From 1e74904881f2f8f38d9c3c25e23d21cda621b4f0 Mon Sep 17 00:00:00 2001 From: midavadim Date: Thu, 2 Nov 2023 12:24:47 +0200 Subject: [PATCH] :tada: Source pinterest - handle non json response (#32078) Co-authored-by: midavadim --- .../connectors/source-pinterest/metadata.yaml | 2 +- .../source_pinterest/streams.py | 13 ++++- .../unit_tests/test_streams.py | 13 +++++ docs/integrations/sources/pinterest.md | 55 ++++++++++--------- 4 files changed, 53 insertions(+), 30 deletions(-) diff --git a/airbyte-integrations/connectors/source-pinterest/metadata.yaml b/airbyte-integrations/connectors/source-pinterest/metadata.yaml index a66c92061de3..5ce3b8a4ca19 100644 --- a/airbyte-integrations/connectors/source-pinterest/metadata.yaml +++ b/airbyte-integrations/connectors/source-pinterest/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: api connectorType: source definitionId: 5cb7e5fe-38c2-11ec-8d3d-0242ac130003 - dockerImageTag: 0.7.0 + dockerImageTag: 0.7.1 dockerRepository: airbyte/source-pinterest connectorBuildOptions: baseImage: docker.io/airbyte/python-connector-base:1.1.0@sha256:bd98f6505c6764b1b5f99d3aedc23dfc9e9af631a62533f60eb32b1d3dbab20c diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/streams.py b/airbyte-integrations/connectors/source-pinterest/source_pinterest/streams.py index 74add04ca3df..906e707b976d 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/streams.py +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/streams.py @@ -20,6 +20,10 @@ MAX_RATE_LIMIT_CODE = 8 +class NonJSONResponse(Exception): + pass + + class PinterestStream(HttpStream, ABC): url_base = "https://api.pinterest.com/v5/" primary_key = "id" @@ -69,8 +73,13 @@ def parse_response(self, response: requests.Response, stream_state: Mapping[str, yield record def should_retry(self, response: requests.Response) -> bool: - if isinstance(response.json(), dict): - self.max_rate_limit_exceeded = response.json().get("code", 0) == MAX_RATE_LIMIT_CODE + try: + resp = response.json() + except requests.exceptions.JSONDecodeError: + raise NonJSONResponse(f"Received unexpected response in non json format: '{response.text}'") + + if isinstance(resp, dict): + self.max_rate_limit_exceeded = resp.get("code", 0) == MAX_RATE_LIMIT_CODE # when max rate limit exceeded, we should skip the stream. if response.status_code == requests.codes.too_many_requests and self.max_rate_limit_exceeded: self.logger.error(f"For stream {self.name} Max Rate Limit exceeded.") diff --git a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_streams.py index 9558dd8634c5..a13930e3daab 100644 --- a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_streams.py @@ -23,6 +23,7 @@ Campaigns, PinterestStream, PinterestSubStream, + UserAccountAnalytics, ) os.environ["REQUEST_CACHE_PATH"] = "/tmp" @@ -115,6 +116,18 @@ def test_should_retry_on_max_rate_limit_error(requests_mock, test_response, stat assert result == expected +def test_non_json_response(requests_mock): + stream = UserAccountAnalytics(parent=None, config=MagicMock()) + url = "https://api.pinterest.com/v5/boards" + requests_mock.get("https://api.pinterest.com/v5/boards", text="some response", status_code=200) + response = requests.get(url) + try: + stream.should_retry(response) + assert False + except Exception as e: + assert "Received unexpected response in non json format" in str(e) + + @pytest.mark.parametrize( "test_response, test_headers, status_code, expected", [ diff --git a/docs/integrations/sources/pinterest.md b/docs/integrations/sources/pinterest.md index ef5cbd09c3fb..fa54c2ed62b4 100644 --- a/docs/integrations/sources/pinterest.md +++ b/docs/integrations/sources/pinterest.md @@ -72,30 +72,31 @@ The connector is restricted by the Pinterest [requests limitation](https://devel ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------| :------------------------------------------------------- |:---------------------------------------------------------------------------------------------------------------------| -| 0.7.0 | 2023-10-25 | [31876](https://github.com/airbytehq/airbyte/pull/31876) | Migrated to base image, removed token based authentication mthod becuase access_token is valid for 1 day only | -| 0.6.0 | 2023-07-25 | [28672](https://github.com/airbytehq/airbyte/pull/28672) | Add report stream for `CAMPAIGN` level | -| 0.5.3 | 2023-07-05 | [27964](https://github.com/airbytehq/airbyte/pull/27964) | Add `id` field to `owner` field in `ad_accounts` stream | -| 0.5.2 | 2023-06-02 | [26949](https://github.com/airbytehq/airbyte/pull/26949) | Update `BoardPins` stream with `note` property | -| 0.5.1 | 2023-05-11 | [25984](https://github.com/airbytehq/airbyte/pull/25984) | Add pattern for start_date | -| 0.5.0 | 2023-05-17 | [26188](https://github.com/airbytehq/airbyte/pull/26188) | Add `product_tags` field to the `BoardPins` stream | -| 0.4.0 | 2023-05-16 | [26112](https://github.com/airbytehq/airbyte/pull/26112) | Add `is_standard` field to the `BoardPins` stream | -| 0.3.0 | 2023-05-09 | [25915](https://github.com/airbytehq/airbyte/pull/25915) | Add `creative_type` field to the `BoardPins` stream | -| 0.2.6 | 2023-04-26 | [25548](https://github.com/airbytehq/airbyte/pull/25548) | Fix `format` issue for `boards` stream schema for fields with `date-time` | -| 0.2.5 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Update `AMOUNT_OF_DAYS_ALLOWED_FOR_LOOKUP` to 89 days | -| 0.2.4 | 2023-02-25 | [23457](https://github.com/airbytehq/airbyte/pull/23457) | Add missing columns for analytics streams for pinterest source | -| 0.2.3 | 2023-03-01 | [23649](https://github.com/airbytehq/airbyte/pull/23649) | Fix for `HTTP - 400 Bad Request` when requesting data >= 90 days | -| 0.2.2 | 2023-01-27 | [22020](https://github.com/airbytehq/airbyte/pull/22020) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 0.2.1 | 2022-12-15 | [20532](https://github.com/airbytehq/airbyte/pull/20532) | Bump CDK version | -| 0.2.0 | 2022-12-13 | [20242](https://github.com/airbytehq/airbyte/pull/20242) | Add data-type normalization up to the schemas declared | -| 0.1.9 | 2022-09-06 | [15074](https://github.com/airbytehq/airbyte/pull/15074) | Add filter based on statuses | -| 0.1.8 | 2022-10-21 | [18285](https://github.com/airbytehq/airbyte/pull/18285) | Fix type of `start_date` | -| 0.1.7 | 2022-09-29 | [17387](https://github.com/airbytehq/airbyte/pull/17387) | Set `start_date` dynamically based on API restrictions. | -| 0.1.6 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Use CDK 0.1.89 | -| 0.1.5 | 2022-09-16 | [16799](https://github.com/airbytehq/airbyte/pull/16799) | Migrate to per-stream state | -| 0.1.4 | 2022-09-06 | [16161](https://github.com/airbytehq/airbyte/pull/16161) | Add ability to handle `429 - Too Many Requests` error with respect to `Max Rate Limit Exceeded Error` | -| 0.1.3 | 2022-09-02 | [16271](https://github.com/airbytehq/airbyte/pull/16271) | Add support of `OAuth2.0` authentication method | -| 0.1.2 | 2021-12-22 | [10223](https://github.com/airbytehq/airbyte/pull/10223) | Fix naming of `AD_ID` and `AD_ACCOUNT_ID` fields | -| 0.1.1 | 2021-12-22 | [9043](https://github.com/airbytehq/airbyte/pull/9043) | Update connector fields title/description | -| 0.1.0 | 2021-10-29 | [7493](https://github.com/airbytehq/airbyte/pull/7493) | Release Pinterest CDK Connector | +| Version | Date | Pull Request | Subject | +|:--------|:-----------| :------------------------------------------------------- |:--------------------------------------------------------------------------------------------------------------| +| 0.7.1 | 2023-11-01 | [32078](https://github.com/airbytehq/airbyte/pull/32078) | handle non json response | +| 0.7.0 | 2023-10-25 | [31876](https://github.com/airbytehq/airbyte/pull/31876) | Migrated to base image, removed token based authentication mthod becuase access_token is valid for 1 day only | +| 0.6.0 | 2023-07-25 | [28672](https://github.com/airbytehq/airbyte/pull/28672) | Add report stream for `CAMPAIGN` level | +| 0.5.3 | 2023-07-05 | [27964](https://github.com/airbytehq/airbyte/pull/27964) | Add `id` field to `owner` field in `ad_accounts` stream | +| 0.5.2 | 2023-06-02 | [26949](https://github.com/airbytehq/airbyte/pull/26949) | Update `BoardPins` stream with `note` property | +| 0.5.1 | 2023-05-11 | [25984](https://github.com/airbytehq/airbyte/pull/25984) | Add pattern for start_date | +| 0.5.0 | 2023-05-17 | [26188](https://github.com/airbytehq/airbyte/pull/26188) | Add `product_tags` field to the `BoardPins` stream | +| 0.4.0 | 2023-05-16 | [26112](https://github.com/airbytehq/airbyte/pull/26112) | Add `is_standard` field to the `BoardPins` stream | +| 0.3.0 | 2023-05-09 | [25915](https://github.com/airbytehq/airbyte/pull/25915) | Add `creative_type` field to the `BoardPins` stream | +| 0.2.6 | 2023-04-26 | [25548](https://github.com/airbytehq/airbyte/pull/25548) | Fix `format` issue for `boards` stream schema for fields with `date-time` | +| 0.2.5 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Update `AMOUNT_OF_DAYS_ALLOWED_FOR_LOOKUP` to 89 days | +| 0.2.4 | 2023-02-25 | [23457](https://github.com/airbytehq/airbyte/pull/23457) | Add missing columns for analytics streams for pinterest source | +| 0.2.3 | 2023-03-01 | [23649](https://github.com/airbytehq/airbyte/pull/23649) | Fix for `HTTP - 400 Bad Request` when requesting data >= 90 days | +| 0.2.2 | 2023-01-27 | [22020](https://github.com/airbytehq/airbyte/pull/22020) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.2.1 | 2022-12-15 | [20532](https://github.com/airbytehq/airbyte/pull/20532) | Bump CDK version | +| 0.2.0 | 2022-12-13 | [20242](https://github.com/airbytehq/airbyte/pull/20242) | Add data-type normalization up to the schemas declared | +| 0.1.9 | 2022-09-06 | [15074](https://github.com/airbytehq/airbyte/pull/15074) | Add filter based on statuses | +| 0.1.8 | 2022-10-21 | [18285](https://github.com/airbytehq/airbyte/pull/18285) | Fix type of `start_date` | +| 0.1.7 | 2022-09-29 | [17387](https://github.com/airbytehq/airbyte/pull/17387) | Set `start_date` dynamically based on API restrictions. | +| 0.1.6 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Use CDK 0.1.89 | +| 0.1.5 | 2022-09-16 | [16799](https://github.com/airbytehq/airbyte/pull/16799) | Migrate to per-stream state | +| 0.1.4 | 2022-09-06 | [16161](https://github.com/airbytehq/airbyte/pull/16161) | Add ability to handle `429 - Too Many Requests` error with respect to `Max Rate Limit Exceeded Error` | +| 0.1.3 | 2022-09-02 | [16271](https://github.com/airbytehq/airbyte/pull/16271) | Add support of `OAuth2.0` authentication method | +| 0.1.2 | 2021-12-22 | [10223](https://github.com/airbytehq/airbyte/pull/10223) | Fix naming of `AD_ID` and `AD_ACCOUNT_ID` fields | +| 0.1.1 | 2021-12-22 | [9043](https://github.com/airbytehq/airbyte/pull/9043) | Update connector fields title/description | +| 0.1.0 | 2021-10-29 | [7493](https://github.com/airbytehq/airbyte/pull/7493) | Release Pinterest CDK Connector |