From b81d942aaea32cd040d9028a93461b87577a7523 Mon Sep 17 00:00:00 2001 From: btkcodedev Date: Wed, 29 May 2024 03:53:15 +0530 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8Source=20NewsData:=20Make=20Connector?= =?UTF-8?q?=20Compatible=20with=20Builder=20(#38731)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Alexandre Girard --- .../connectors/source-newsdata/icon.svg | 5 + .../connectors/source-newsdata/metadata.yaml | 2 +- .../connectors/source-newsdata/pyproject.toml | 2 +- .../source-newsdata/requirements.txt | 1 - .../source_newsdata/manifest.yaml | 636 ++++++++++++------ .../source-newsdata/source_newsdata/spec.yaml | 198 ------ docs/integrations/sources/newsdata.md | 1 + 7 files changed, 439 insertions(+), 406 deletions(-) create mode 100644 airbyte-integrations/connectors/source-newsdata/icon.svg delete mode 100644 airbyte-integrations/connectors/source-newsdata/requirements.txt delete mode 100644 airbyte-integrations/connectors/source-newsdata/source_newsdata/spec.yaml diff --git a/airbyte-integrations/connectors/source-newsdata/icon.svg b/airbyte-integrations/connectors/source-newsdata/icon.svg new file mode 100644 index 0000000000000..574ad08bf6d81 --- /dev/null +++ b/airbyte-integrations/connectors/source-newsdata/icon.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/airbyte-integrations/connectors/source-newsdata/metadata.yaml b/airbyte-integrations/connectors/source-newsdata/metadata.yaml index 33d96c0a11d92..2ce5752c493e6 100644 --- a/airbyte-integrations/connectors/source-newsdata/metadata.yaml +++ b/airbyte-integrations/connectors/source-newsdata/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: 60bd11d8-2632-4daa-a688-b47336d32093 - dockerImageTag: 0.1.3 + dockerImageTag: 0.1.4 dockerRepository: airbyte/source-newsdata documentationUrl: https://docs.airbyte.com/integrations/sources/newsdata githubIssueLabel: source-newsdata diff --git a/airbyte-integrations/connectors/source-newsdata/pyproject.toml b/airbyte-integrations/connectors/source-newsdata/pyproject.toml index 15f3b6e3f597f..fe14275e5c93b 100644 --- a/airbyte-integrations/connectors/source-newsdata/pyproject.toml +++ b/airbyte-integrations/connectors/source-newsdata/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "0.1.3" +version = "0.1.4" name = "source-newsdata" description = "Source implementation for Newsdata." authors = [ "Airbyte ",] diff --git a/airbyte-integrations/connectors/source-newsdata/requirements.txt b/airbyte-integrations/connectors/source-newsdata/requirements.txt deleted file mode 100644 index d6e1198b1ab1f..0000000000000 --- a/airbyte-integrations/connectors/source-newsdata/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --e . diff --git a/airbyte-integrations/connectors/source-newsdata/source_newsdata/manifest.yaml b/airbyte-integrations/connectors/source-newsdata/source_newsdata/manifest.yaml index 87a8c25cb82a4..689d9cb786ea4 100644 --- a/airbyte-integrations/connectors/source-newsdata/source_newsdata/manifest.yaml +++ b/airbyte-integrations/connectors/source-newsdata/source_newsdata/manifest.yaml @@ -1,214 +1,440 @@ -version: "0.29.0" +version: 0.79.1 + +type: DeclarativeSource + +check: + type: CheckStream + stream_names: + - latest + - sources definitions: - selector: - extractor: - field_path: ["results"] + streams: + latest: + type: DeclarativeStream + name: latest + primary_key: + - link + retriever: + type: SimpleRetriever + requester: + $ref: "#/definitions/base_requester" + path: /news + http_method: GET + request_parameters: + country: "{{ ','.join(config['country']) }}" + language: "{{ ','.join(config['language']) }}" + category: "{{ ','.join(config['category']) }}" + q: "{{ config['query'] | urlencode }}" + qInTitle: "{{ config['query_in_title'] | urlencode }}" + domain: "{{ ','.join(config['domain']) }}" + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - results + paginator: + type: DefaultPaginator + page_token_option: + type: RequestOption + inject_into: request_parameter + field_name: page + page_size_option: + type: RequestOption + field_name: X-Pagination-Page-Size + inject_into: header + pagination_strategy: + type: CursorPagination + page_size: 10 + cursor_value: "{{ response['nextPage'] }}" + schema_loader: + type: InlineSchemaLoader + schema: + $ref: "#/schemas/latest" + sources: + type: DeclarativeStream + name: sources + primary_key: + - id + retriever: + type: SimpleRetriever + requester: + $ref: "#/definitions/base_requester" + path: /sources + http_method: GET + request_parameters: + country: "{{ config['country'][0] }}" + language: "{{ config['language'][0] }}" + category: "{{ config['category'][0] }}" + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - results + schema_loader: + type: InlineSchemaLoader + schema: + $ref: "#/schemas/sources" base_requester: - url_base: "https://newsdata.io/api/1" - http_method: "GET" + type: HttpRequester + url_base: https://newsdata.io/api/1 authenticator: type: ApiKeyAuthenticator - header: "X-ACCESS-KEY" api_token: "{{ config['api_key'] }}" - base_retriever: - record_selector: - $ref: "#/definitions/selector" - base_stream: - retriever: - $ref: "#/definitions/base_retriever" - requester: - $ref: "#/definitions/base_requester" - cursor_paginator: - type: "DefaultPaginator" - pagination_strategy: - type: "CursorPagination" - cursor_value: "{{ response['nextPage'] }}" - page_size: 10 - # TODO: make page_size dynamic, depending on free or paid tier. See https://github.com/airbytehq/airbyte/issues/18783 - page_token_option: - type: RequestOption - field_name: "page" - inject_into: "request_parameter" - page_size_option: # This is useless, only there because it is required, but page sizes are managed automatically by API subscription type - field_name: "X-Pagination-Page-Size" - inject_into: "header" - latest_stream: - $ref: "#/definitions/base_stream" - $parameters: - name: "latest" - primary_key: "link" - path: "/news" - retriever: - $ref: "#/definitions/base_retriever" - requester: - $ref: "#/definitions/base_requester" - request_parameters: - country: "{{ ','.join(config['country']) }}" - language: "{{ ','.join(config['language']) }}" - category: "{{ ','.join(config['category']) }}" - q: "{{ config['query'] | urlencode }}" - qInTitle: "{{ config['query_in_title'] | urlencode }}" - domain: "{{ ','.join(config['domain']) }}" - paginator: - $ref: "#/definitions/cursor_paginator" - schema_loader: - type: InlineSchemaLoader - schema: - $schema: http://json-schema.org/draft-07/schema# - type: object - properties: - title: - description: The title or headline of the news article - type: - - "null" - - string - link: - description: URL link to the full news article - type: - - "null" - - string - source_id: - description: Unique identifier of the news source - type: - - "null" - - string - keywords: - description: Keywords or tags associated with the news article - type: - - "null" - - array - items: - type: - - "null" - - string - creator: - description: The creator or author of the news article - type: - - "null" - - array - items: - type: - - "null" - - string - image_url: - description: URL of the image associated with the news article - type: - - "null" - - string - video_url: - description: URL of any video associated with the news article - type: - - "null" - - string - description: - description: A brief summary or description of the news article - type: - - "null" - - string - pubDate: - description: The publication date of the news article - type: - - "null" - - string - content: - description: The main content or text of the news article - type: - - "null" - - string - country: - description: The country where the news article originated - type: - - "null" - - array - items: - type: - - "null" - - string - category: - description: The category or topic of the news article - type: - - "null" - - array - items: - type: string - language: - description: The language in which the news article is written - type: - - "null" - - string - sources_stream: - $ref: "#/definitions/base_stream" - $parameters: - name: "sources" - primary_key: "id" - path: "/sources" - retriever: - $ref: "#/definitions/base_retriever" - requester: - $ref: "#/definitions/base_requester" - request_parameters: - country: "{{ config['country'][0] }}" - language: "{{ config['language'][0] }}" - category: "{{ config['category'][0] }}" + inject_into: + type: RequestOption + field_name: X-ACCESS-KEY + inject_into: header - schema_loader: - type: InlineSchemaLoader - schema: - $schema: http://json-schema.org/draft-07/schema# - type: object - properties: - id: - description: The unique identifier of the news source. - type: - - "null" - - string - name: - description: The name of the news source. - type: - - "null" - - string - url: - description: The URL of the news source. - type: - - "null" - - string - category: - description: - The category of the news source, e.g., business, entertainment, - general, health, science, sports, technology, etc. - type: - - "null" - - array - items: - type: - - "null" - - string - language: - description: - The language in which the news source publishes its content, - e.g., en, fr, de, es, etc. - type: - - "null" - - array - items: - type: - - "null" - - string - country: - description: The country in which the news source is based or covers primarily. - type: - - "null" - - array - items: - type: - - "null" - - string streams: - - "#/definitions/latest_stream" - - "#/definitions/sources_stream" + - $ref: "#/definitions/streams/latest" + - $ref: "#/definitions/streams/sources" -check: - stream_names: - - "latest" - - "sources" +spec: + type: Spec + connection_specification: + type: object + $schema: http://json-schema.org/draft-07/schema# + required: + - api_key + properties: + api_key: + type: string + title: API Key + airbyte_secret: true + description: API Key + order: 0 + domain: + type: array + description: >- + Domains (maximum 5) to restrict the search to. Use the sources stream + to find top sources id. + maxitems: 5 + items: + type: string + order: 1 + country: + type: array + description: 2-letter ISO 3166-1 countries (maximum 5) to restrict the search to. + maxitems: 5 + order: 2 + items: + type: string + enum: + - ar + - au + - at + - bd + - by + - be + - br + - bg + - ca + - cl + - cn + - co + - cr + - cu + - cz + - dk + - do + - ec + - eg + - ee + - et + - fi + - fr + - de + - gr + - hk + - hu + - in + - id + - iq + - ie + - il + - it + - jp + - kz + - kw + - lv + - lb + - lt + - my + - mx + - ma + - mm + - nl + - nz + - ng + - kp + - "no" + - pk + - pe + - ph + - pl + - pt + - pr + - ro + - ru + - sa + - rs + - sg + - sk + - si + - za + - kr + - es + - se + - ch + - tw + - tz + - th + - tr + - ua + - ae + - gb + - us + - ve + - vi + category: + type: array + description: Categories (maximum 5) to restrict the search to. + maxitems: 5 + order: 3 + items: + type: string + enum: + - business + - entertainment + - environment + - food + - health + - politics + - science + - sports + - technology + - top + - world + language: + type: array + description: Languages (maximum 5) to restrict the search to. + maxitems: 5 + order: 4 + items: + type: string + enum: + - be + - am + - ar + - bn + - bs + - bg + - my + - ckb + - zh + - hr + - cs + - da + - nl + - en + - et + - fi + - fr + - de + - el + - he + - hi + - hu + - in + - it + - jp + - ko + - lv + - lt + - ms + - "no" + - pl + - pt + - ro + - ru + - sr + - sk + - sl + - es + - sw + - sv + - th + - tr + - uk + - ur + - vi + OneOf: + query: + type: string + description: >- + Keywords or phrases to search for in the news title and content. + Advanced Search options: + - Search `Social`: query = "social" + - Search `Social Pizza`: query = "social pizza" + - Search `Social` but not with `pizza`: query = "social -pizza" + - Search `Social` but not with `pizza` and `wildfire`: query = "social -pizza -wildfire" + - Search `Social` and `pizza`: query = "social AND pizza" + - Search `Social` and `pizza` and `pasta`: query = "social AND pizza AND pasta" + - Search `Social` or `pizza`: query = "social OR pizza" + - Search `Social` or `pizza` but not `pasta`: query = "social OR pizza -pasta" + - Search `Social` or `pizza` or `pasta`: query = "social OR pizza OR pasta" + Note: You can't use AND and OR in the same query. + order: 1 + query_in_title: + type: string + description: >- + Same as `query`, but restricting the search to only the news title. + It cannot be used along with `query`. + order: 1 + order: 5 + additionalProperties: true + +metadata: + autoImportSchema: + latest: false + sources: false + +schemas: + latest: + type: object + $schema: http://json-schema.org/draft-07/schema# + properties: + category: + type: + - "null" + - array + description: The category or topic of the news article + items: + type: string + content: + type: + - "null" + - string + description: The main content or text of the news article + country: + type: + - "null" + - array + description: The country where the news article originated + items: + type: + - "null" + - string + creator: + type: + - "null" + - array + description: The creator or author of the news article + items: + type: + - "null" + - string + description: + type: + - "null" + - string + description: A brief summary or description of the news article + image_url: + type: + - "null" + - string + description: URL of the image associated with the news article + keywords: + type: + - "null" + - array + description: Keywords or tags associated with the news article + items: + type: + - "null" + - string + language: + type: + - "null" + - string + description: The language in which the news article is written + link: + type: + - "null" + - string + description: URL link to the full news article + pubDate: + type: + - "null" + - string + description: The publication date of the news article + source_id: + type: + - "null" + - string + description: Unique identifier of the news source + title: + type: + - "null" + - string + description: The title or headline of the news article + video_url: + type: + - "null" + - string + description: URL of any video associated with the news article + additionalProperties: true + sources: + type: object + $schema: http://json-schema.org/draft-07/schema# + properties: + category: + type: + - "null" + - array + description: >- + The category of the news source, e.g., business, entertainment, + general, health, science, sports, technology, etc. + items: + type: + - "null" + - string + country: + type: + - "null" + - array + description: The country in which the news source is based or covers primarily. + items: + type: + - "null" + - string + id: + type: + - "null" + - string + description: The unique identifier of the news source. + language: + type: + - "null" + - array + description: >- + The language in which the news source publishes its content, e.g., en, + fr, de, es, etc. + items: + type: + - "null" + - string + name: + type: + - "null" + - string + description: The name of the news source. + url: + type: + - "null" + - string + description: The URL of the news source. + additionalProperties: true diff --git a/airbyte-integrations/connectors/source-newsdata/source_newsdata/spec.yaml b/airbyte-integrations/connectors/source-newsdata/source_newsdata/spec.yaml deleted file mode 100644 index dd186f2a5c96b..0000000000000 --- a/airbyte-integrations/connectors/source-newsdata/source_newsdata/spec.yaml +++ /dev/null @@ -1,198 +0,0 @@ -documentationUrl: https://docs.airbyte.com/integrations/sources/newsdata -connectionSpecification: - $schema: http://json-schema.org/draft-07/schema# - title: Newsdata Spec - type: object - required: - - api_key - additionalProperties: true - properties: - api_key: - type: string - description: API Key - airbyte_secret: true - order: 0 - OneOf: - query: - type: string - description: >- - Keywords or phrases to search for in the news title and content. - Advanced Search options: - - Search `Social`: query = "social" - - Search `Social Pizza`: query = "social pizza" - - Search `Social` but not with `pizza`: query = "social -pizza" - - Search `Social` but not with `pizza` and `wildfire`: query = "social -pizza -wildfire" - - Search `Social` and `pizza`: query = "social AND pizza" - - Search `Social` and `pizza` and `pasta`: query = "social AND pizza AND pasta" - - Search `Social` or `pizza`: query = "social OR pizza" - - Search `Social` or `pizza` but not `pasta`: query = "social OR pizza -pasta" - - Search `Social` or `pizza` or `pasta`: query = "social OR pizza OR pasta" - Note: You can't use AND and OR in the same query. - order: 1 - query_in_title: - type: string - description: Same as `query`, but restricting the search to only the news title. It cannot be used along with `query`. - order: 1 - domain: - type: array - description: Domains (maximum 5) to restrict the search to. Use the sources stream to find top sources id. - maxitems: 5 - items: - type: string - order: 2 - country: - type: array - description: 2-letter ISO 3166-1 countries (maximum 5) to restrict the search to. - maxitems: 5 - order: 3 - items: - type: string - enum: - - ar - - au - - at - - bd - - by - - be - - br - - bg - - ca - - cl - - cn - - co - - cr - - cu - - cz - - dk - - do - - ec - - eg - - ee - - et - - fi - - fr - - de - - gr - - hk - - hu - - in - - id - - iq - - ie - - il - - it - - jp - - kz - - kw - - lv - - lb - - lt - - my - - mx - - ma - - mm - - nl - - nz - - ng - - kp - - "no" - - pk - - pe - - ph - - pl - - pt - - pr - - ro - - ru - - sa - - rs - - sg - - sk - - si - - za - - kr - - es - - se - - ch - - tw - - tz - - th - - tr - - ua - - ae - - gb - - us - - ve - - vi - category: - type: array - description: Categories (maximum 5) to restrict the search to. - maxitems: 5 - order: 4 - items: - type: string - enum: - - business - - entertainment - - environment - - food - - health - - politics - - science - - sports - - technology - - top - - world - language: - type: array - description: Languages (maximum 5) to restrict the search to. - maxitems: 5 - order: 5 - items: - type: string - enum: - - be - - am - - ar - - bn - - bs - - bg - - my - - ckb - - zh - - hr - - cs - - da - - nl - - en - - et - - fi - - fr - - de - - el - - he - - hi - - hu - - in - - it - - jp - - ko - - lv - - lt - - ms - - "no" - - pl - - pt - - ro - - ru - - sr - - sk - - sl - - es - - sw - - sv - - th - - tr - - uk - - ur - - vi diff --git a/docs/integrations/sources/newsdata.md b/docs/integrations/sources/newsdata.md index aa826b689fc2a..bc9f749b94253 100644 --- a/docs/integrations/sources/newsdata.md +++ b/docs/integrations/sources/newsdata.md @@ -45,6 +45,7 @@ The following fields are required fields for the connector to work: | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------------ | +| 0.1.4 | 2024-05-28 | [38731](https://github.com/airbytehq/airbyte/pull/38731) | Make compatible with the builder | | 0.1.3 | 2024-04-19 | [37203](https://github.com/airbytehq/airbyte/pull/37203) | Upgrade to CDK 0.80.0 and manage dependencies with Poetry. | | 0.1.2 | 2024-04-15 | [37203](https://github.com/airbytehq/airbyte/pull/37203) | Base image migration: remove Dockerfile and use the python-connector-base image | | 0.1.1 | 2024-04-12 | [37203](https://github.com/airbytehq/airbyte/pull/37203) | schema descriptions |