diff --git a/airbyte-integrations/connectors/source-mixpanel/Dockerfile b/airbyte-integrations/connectors/source-mixpanel/Dockerfile index d4de3e177096..99c4dfa0e603 100644 --- a/airbyte-integrations/connectors/source-mixpanel/Dockerfile +++ b/airbyte-integrations/connectors/source-mixpanel/Dockerfile @@ -13,5 +13,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.41 +LABEL io.airbyte.version=1.0.0 LABEL io.airbyte.name=airbyte/source-mixpanel diff --git a/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml b/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml index 9551a1276369..bb8fe872ccec 100644 --- a/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-mixpanel/acceptance-test-config.yml @@ -1,6 +1,7 @@ # See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) # for more information about how to configure these tests connector_image: airbyte/source-mixpanel:dev +# custom configuration is used for tests to speed up testing and avoid hitting rate limits custom_environment_variables: REQS_PER_HOUR_LIMIT: 0 AVAILABLE_TESTING_RANGE_DAYS: 10 diff --git a/airbyte-integrations/connectors/source-mixpanel/integration_tests/configured_catalog_incremental.json b/airbyte-integrations/connectors/source-mixpanel/integration_tests/configured_catalog_incremental.json index 13e1c2ae450f..453ec0d2631f 100644 --- a/airbyte-integrations/connectors/source-mixpanel/integration_tests/configured_catalog_incremental.json +++ b/airbyte-integrations/connectors/source-mixpanel/integration_tests/configured_catalog_incremental.json @@ -21,12 +21,12 @@ "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, "default_cursor_field": ["time"], - "source_defined_primary_key": [["distinct_id"]] + "source_defined_primary_key": [["distinct_id"], ["event"], ["time"]] }, "sync_mode": "incremental", "destination_sync_mode": "append", "cursor_field": ["time"], - "primary_key": [["distinct_id"]] + "primary_key": [["distinct_id"], ["event"], ["time"]] }, { "stream": { diff --git a/airbyte-integrations/connectors/source-mixpanel/metadata.yaml b/airbyte-integrations/connectors/source-mixpanel/metadata.yaml index e8494fa4a981..c48242555516 100644 --- a/airbyte-integrations/connectors/source-mixpanel/metadata.yaml +++ b/airbyte-integrations/connectors/source-mixpanel/metadata.yaml @@ -6,7 +6,7 @@ data: connectorSubtype: api connectorType: source definitionId: 12928b32-bf0a-4f1e-964f-07e12e37153a - dockerImageTag: 0.1.41 + dockerImageTag: 1.0.0 dockerRepository: airbyte/source-mixpanel githubIssueLabel: source-mixpanel icon: mixpanel.svg @@ -18,6 +18,11 @@ data: oss: enabled: true releaseStage: generally_available + releases: + breakingChanges: + 1.0.0: + message: In this release, the datetime field of stream engage has had its type changed from date-time to string due to inconsistent data from Mixpanel. Additionally, the primary key for stream export has been fixed to uniquely identify records. Users will need to refresh the source schema and reset affected streams after upgrading. + upgradeDeadline: "2023-10-31" suggestedStreams: streams: - export diff --git a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/streams/engage.py b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/streams/engage.py index 46aa7593d949..9a52b847f09a 100644 --- a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/streams/engage.py +++ b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/streams/engage.py @@ -184,7 +184,8 @@ def get_json_schema(self) -> Mapping[str, Any]: types = { "boolean": {"type": ["null", "boolean"]}, "number": {"type": ["null", "number"], "multipleOf": 1e-20}, - "datetime": {"type": ["null", "string"], "format": "date-time"}, + # no format specified as values can be "2021-12-16T00:00:00", "1638298874", "15/08/53895" + "datetime": {"type": ["null", "string"]}, "object": {"type": ["null", "object"], "additionalProperties": True}, "list": {"type": ["null", "array"], "required": False, "items": {}}, "string": {"type": ["null", "string"]}, diff --git a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/streams/export.py b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/streams/export.py index 05b8d636625a..d35e428a82f4 100644 --- a/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/streams/export.py +++ b/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/streams/export.py @@ -75,7 +75,7 @@ class Export(DateSlicesMixin, IncrementalMixpanelStream): 3 queries per second and 60 queries per hour. """ - primary_key: str = "distinct_id" + primary_key: str = ["distinct_id", "event", "time"] cursor_field: str = "time" transformer = TypeTransformer(TransformConfig.DefaultSchemaNormalization) diff --git a/airbyte-integrations/connectors/source-mixpanel/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-mixpanel/unit_tests/test_streams.py index 3d365508f4dd..de54d1c6b89f 100644 --- a/airbyte-integrations/connectors/source-mixpanel/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-mixpanel/unit_tests/test_streams.py @@ -279,23 +279,47 @@ def engage_schema_response(): 200, { "results": { - "$browser": {"count": 124, "type": "string"}, - "$browser_version": {"count": 124, "type": "string"}, "$created": {"count": 124, "type": "string"}, + "$is_active": {"count": 412, "type": "boolean"}, + "$CreatedDateTimestamp": {"count": 300, "type": "number"}, + "$CreatedDate": {"count": 11, "type": "datetime"}, + "$properties": {"count": 2, "type": "object"}, + "$tags": {"count": 131, "type": "list"}, } }, ) def test_engage_schema(requests_mock, engage_schema_response, config): - - stream = EngageSchema(authenticator=MagicMock(), **config) - requests_mock.register_uri("GET", get_url_to_mock(stream), engage_schema_response) - - records = stream.read_records(sync_mode=SyncMode.full_refresh) - - records_length = sum(1 for _ in records) - assert records_length == 3 + stream = Engage(authenticator=MagicMock(), **config) + requests_mock.register_uri("GET", get_url_to_mock(EngageSchema(authenticator=MagicMock(), **config)), engage_schema_response) + assert stream.get_json_schema() == { + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "CreatedDate": {"type": ["null", "string"]}, + "CreatedDateTimestamp": {"multipleOf": 1e-20, "type": ["null", "number"]}, + "browser": {"type": ["null", "string"]}, + "browser_version": {"type": ["null", "string"]}, + "city": {"type": ["null", "string"]}, + "country_code": {"type": ["null", "string"]}, + "created": {"type": ["null", "string"]}, + "distinct_id": {"type": ["null", "string"]}, + "email": {"type": ["null", "string"]}, + "first_name": {"type": ["null", "string"]}, + "id": {"type": ["null", "string"]}, + "is_active": {"type": ["null", "boolean"]}, + "last_name": {"type": ["null", "string"]}, + "last_seen": {"format": "date-time", "type": ["null", "string"]}, + "name": {"type": ["null", "string"]}, + "properties": {"additionalProperties": True, "type": ["null", "object"]}, + "region": {"type": ["null", "string"]}, + "tags": {"items": {}, "required": False, "type": ["null", "array"]}, + "timezone": {"type": ["null", "string"]}, + "unblocked": {"type": ["null", "string"]}, + }, + "type": "object", + } def test_update_engage_schema(requests_mock, config): diff --git a/docs/integrations/sources/mixpanel-migrations.md b/docs/integrations/sources/mixpanel-migrations.md new file mode 100644 index 000000000000..316406819e71 --- /dev/null +++ b/docs/integrations/sources/mixpanel-migrations.md @@ -0,0 +1,5 @@ +# Mixpanel Migration Guide + +## Upgrading to 1.0.0 + +In this release, the datetime field of stream engage has had its type changed from date-time to string due to inconsistent data from Mixpanel. Additionally, the primary key for stream export has been fixed to uniquely identify records. Users will need to refresh the source schema and reset affected streams after upgrading. diff --git a/docs/integrations/sources/mixpanel.md b/docs/integrations/sources/mixpanel.md index 3fab41988ae3..06af62fb3aa4 100644 --- a/docs/integrations/sources/mixpanel.md +++ b/docs/integrations/sources/mixpanel.md @@ -49,46 +49,47 @@ Syncing huge date windows may take longer due to Mixpanel's low API rate-limits ## CHANGELOG -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------| -| 0.1.41 | 2023-09-26 | [30149](https://github.com/airbytehq/airbyte/pull/30149) | Change config schema; set checkpointing interval; add suggested streams; add cating datetime fields. | -| 0.1.40 | 2022-09-20 | [30090](https://github.com/airbytehq/airbyte/pull/30090) | Handle 400 error when the credentials become expired | -| 0.1.39 | 2023-09-15 | [30469](https://github.com/airbytehq/airbyte/pull/30469) | Add default primary key `distinct_id` to `Export` stream | -| 0.1.38 | 2023-08-31 | [30028](https://github.com/airbytehq/airbyte/pull/30028) | Handle gracefully project timezone mismatch | -| 0.1.37 | 2023-07-20 | [27932](https://github.com/airbytehq/airbyte/pull/27932) | Fix spec: change start/end date format to `date` | -| 0.1.36 | 2023-06-27 | [27752](https://github.com/airbytehq/airbyte/pull/27752) | Partially revert version 0.1.32; Use exponential backoff; | -| 0.1.35 | 2023-06-12 | [27252](https://github.com/airbytehq/airbyte/pull/27252) | Add should_retry False for 402 error | -| 0.1.34 | 2023-05-15 | [21837](https://github.com/airbytehq/airbyte/pull/21837) | Add "insert_id" field to "export" stream schema | -| 0.1.33 | 2023-04-25 | [25543](https://github.com/airbytehq/airbyte/pull/25543) | Set should_retry for 104 error in stream export | -| 0.1.32 | 2023-04-11 | [25056](https://github.com/airbytehq/airbyte/pull/25056) | Set HttpAvailabilityStrategy, add exponential backoff, streams export and annotations add undeclared fields | -| 0.1.31 | 2023-02-13 | [22936](https://github.com/airbytehq/airbyte/pull/22936) | Specified date formatting in specification | -| 0.1.30 | 2023-01-27 | [22017](https://github.com/airbytehq/airbyte/pull/22017) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 0.1.29 | 2022-11-02 | [18846](https://github.com/airbytehq/airbyte/pull/18846) | For "export" stream make line parsing more robust | -| 0.1.28 | 2022-10-06 | [17699](https://github.com/airbytehq/airbyte/pull/17699) | Fix discover step issue cursor field None | -| 0.1.27 | 2022-09-29 | [17415](https://github.com/airbytehq/airbyte/pull/17415) | Disable stream "cohort_members" on discover if not access | -| 0.1.26 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream states. | -| 0.1.25 | 2022-09-27 | [17145](https://github.com/airbytehq/airbyte/pull/17145) | Disable streams "export", "engage" on discover if not access | -| 0.1.24 | 2022-09-26 | [16915](https://github.com/airbytehq/airbyte/pull/16915) | Added Service Accounts support | -| 0.1.23 | 2022-09-18 | [16843](https://github.com/airbytehq/airbyte/pull/16843) | Add stream=True for `export` stream | -| 0.1.22 | 2022-09-15 | [16770](https://github.com/airbytehq/airbyte/pull/16770) | Use "Retry-After" header for backoff | -| 0.1.21 | 2022-09-11 | [16191](https://github.com/airbytehq/airbyte/pull/16191) | Improved connector's input configuration validation | -| 0.1.20 | 2022-08-22 | [15091](https://github.com/airbytehq/airbyte/pull/15091) | Improve `export` stream cursor support | -| 0.1.19 | 2022-08-18 | [15739](https://github.com/airbytehq/airbyte/pull/15739) | Update `titile` and `description` for `Project Secret` field | -| 0.1.18 | 2022-07-21 | [14924](https://github.com/airbytehq/airbyte/pull/14924) | Remove `additionalProperties` field from schemas and specs | -| 0.1.17 | 2022-06-01 | [12801](https://github.com/airbytehq/airbyte/pull/13372) | Acceptance tests fix, fixing some bugs for beta release | -| 0.1.16 | 2022-05-30 | [12801](https://github.com/airbytehq/airbyte/pull/12801) | Add end_date parameter | -| 0.1.15 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy | -| 0.1.14 | 2022-05-02 | [11501](https://github.com/airbytehq/airbyte/pull/11501) | Improve incremental sync method to streams | -| 0.1.13 | 2022-04-27 | [12335](https://github.com/airbytehq/airbyte/pull/12335) | Adding fixtures to mock time.sleep for connectors that explicitly sleep | -| 0.1.12 | 2022-03-31 | [11633](https://github.com/airbytehq/airbyte/pull/11633) | Increase unit test coverage | -| 0.1.11 | 2022-04-04 | [11318](https://github.com/airbytehq/airbyte/pull/11318) | Change Response Reading | -| 0.1.10 | 2022-03-31 | [11227](https://github.com/airbytehq/airbyte/pull/11227) | Fix cohort id always null in the cohort_members stream | -| 0.1.9 | 2021-12-07 | [8429](https://github.com/airbytehq/airbyte/pull/8578) | Updated titles and descriptions | -| 0.1.7 | 2021-12-01 | [8381](https://github.com/airbytehq/airbyte/pull/8381) | Increased performance for `discovery` stage during connector setup | -| 0.1.6 | 2021-11-25 | [8256](https://github.com/airbytehq/airbyte/issues/8256) | Deleted `date_window_size` and fix schemas date type issue | -| 0.1.5 | 2021-11-10 | [7451](https://github.com/airbytehq/airbyte/issues/7451) | Support `start_date` older than 1 year | -| 0.1.4 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | -| 0.1.3 | 2021-10-30 | [7505](https://github.com/airbytehq/airbyte/issues/7505) | Guarantee that standard and custom mixpanel properties in the `Engage` stream are written as strings | -| 0.1.2 | 2021-11-02 | [7439](https://github.com/airbytehq/airbyte/issues/7439) | Added delay for all streams to match API limitation of requests rate | -| 0.1.1 | 2021-09-16 | [6075](https://github.com/airbytehq/airbyte/issues/6075) | Added option to select project region | -| 0.1.0 | 2021-07-06 | [3698](https://github.com/airbytehq/airbyte/issues/3698) | Created CDK native mixpanel connector | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------------------------------------| +| 1.0.0 | 2023-09-27 | [30025](https://github.com/airbytehq/airbyte/pull/30025) | Fix type of datetime field in engage stream; fix primary key for export stream. | +| 0.1.41 | 2023-09-26 | [30149](https://github.com/airbytehq/airbyte/pull/30149) | Change config schema; set checkpointing interval; add suggested streams; add casting datetime fields. | +| 0.1.40 | 2022-09-20 | [30090](https://github.com/airbytehq/airbyte/pull/30090) | Handle 400 error when the credentials become expired | +| 0.1.39 | 2023-09-15 | [30469](https://github.com/airbytehq/airbyte/pull/30469) | Add default primary key `distinct_id` to `Export` stream | +| 0.1.38 | 2023-08-31 | [30028](https://github.com/airbytehq/airbyte/pull/30028) | Handle gracefully project timezone mismatch | +| 0.1.37 | 2023-07-20 | [27932](https://github.com/airbytehq/airbyte/pull/27932) | Fix spec: change start/end date format to `date` | +| 0.1.36 | 2023-06-27 | [27752](https://github.com/airbytehq/airbyte/pull/27752) | Partially revert version 0.1.32; Use exponential backoff; | +| 0.1.35 | 2023-06-12 | [27252](https://github.com/airbytehq/airbyte/pull/27252) | Add should_retry False for 402 error | +| 0.1.34 | 2023-05-15 | [21837](https://github.com/airbytehq/airbyte/pull/21837) | Add "insert_id" field to "export" stream schema | +| 0.1.33 | 2023-04-25 | [25543](https://github.com/airbytehq/airbyte/pull/25543) | Set should_retry for 104 error in stream export | +| 0.1.32 | 2023-04-11 | [25056](https://github.com/airbytehq/airbyte/pull/25056) | Set HttpAvailabilityStrategy, add exponential backoff, streams export and annotations add undeclared fields | +| 0.1.31 | 2023-02-13 | [22936](https://github.com/airbytehq/airbyte/pull/22936) | Specified date formatting in specification | +| 0.1.30 | 2023-01-27 | [22017](https://github.com/airbytehq/airbyte/pull/22017) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.1.29 | 2022-11-02 | [18846](https://github.com/airbytehq/airbyte/pull/18846) | For "export" stream make line parsing more robust | +| 0.1.28 | 2022-10-06 | [17699](https://github.com/airbytehq/airbyte/pull/17699) | Fix discover step issue cursor field None | +| 0.1.27 | 2022-09-29 | [17415](https://github.com/airbytehq/airbyte/pull/17415) | Disable stream "cohort_members" on discover if not access | +| 0.1.26 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream states. | +| 0.1.25 | 2022-09-27 | [17145](https://github.com/airbytehq/airbyte/pull/17145) | Disable streams "export", "engage" on discover if not access | +| 0.1.24 | 2022-09-26 | [16915](https://github.com/airbytehq/airbyte/pull/16915) | Added Service Accounts support | +| 0.1.23 | 2022-09-18 | [16843](https://github.com/airbytehq/airbyte/pull/16843) | Add stream=True for `export` stream | +| 0.1.22 | 2022-09-15 | [16770](https://github.com/airbytehq/airbyte/pull/16770) | Use "Retry-After" header for backoff | +| 0.1.21 | 2022-09-11 | [16191](https://github.com/airbytehq/airbyte/pull/16191) | Improved connector's input configuration validation | +| 0.1.20 | 2022-08-22 | [15091](https://github.com/airbytehq/airbyte/pull/15091) | Improve `export` stream cursor support | +| 0.1.19 | 2022-08-18 | [15739](https://github.com/airbytehq/airbyte/pull/15739) | Update `titile` and `description` for `Project Secret` field | +| 0.1.18 | 2022-07-21 | [14924](https://github.com/airbytehq/airbyte/pull/14924) | Remove `additionalProperties` field from schemas and specs | +| 0.1.17 | 2022-06-01 | [12801](https://github.com/airbytehq/airbyte/pull/13372) | Acceptance tests fix, fixing some bugs for beta release | +| 0.1.16 | 2022-05-30 | [12801](https://github.com/airbytehq/airbyte/pull/12801) | Add end_date parameter | +| 0.1.15 | 2022-05-04 | [12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy | +| 0.1.14 | 2022-05-02 | [11501](https://github.com/airbytehq/airbyte/pull/11501) | Improve incremental sync method to streams | +| 0.1.13 | 2022-04-27 | [12335](https://github.com/airbytehq/airbyte/pull/12335) | Adding fixtures to mock time.sleep for connectors that explicitly sleep | +| 0.1.12 | 2022-03-31 | [11633](https://github.com/airbytehq/airbyte/pull/11633) | Increase unit test coverage | +| 0.1.11 | 2022-04-04 | [11318](https://github.com/airbytehq/airbyte/pull/11318) | Change Response Reading | +| 0.1.10 | 2022-03-31 | [11227](https://github.com/airbytehq/airbyte/pull/11227) | Fix cohort id always null in the cohort_members stream | +| 0.1.9 | 2021-12-07 | [8429](https://github.com/airbytehq/airbyte/pull/8578) | Updated titles and descriptions | +| 0.1.7 | 2021-12-01 | [8381](https://github.com/airbytehq/airbyte/pull/8381) | Increased performance for `discovery` stage during connector setup | +| 0.1.6 | 2021-11-25 | [8256](https://github.com/airbytehq/airbyte/issues/8256) | Deleted `date_window_size` and fix schemas date type issue | +| 0.1.5 | 2021-11-10 | [7451](https://github.com/airbytehq/airbyte/issues/7451) | Support `start_date` older than 1 year | +| 0.1.4 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | +| 0.1.3 | 2021-10-30 | [7505](https://github.com/airbytehq/airbyte/issues/7505) | Guarantee that standard and custom mixpanel properties in the `Engage` stream are written as strings | +| 0.1.2 | 2021-11-02 | [7439](https://github.com/airbytehq/airbyte/issues/7439) | Added delay for all streams to match API limitation of requests rate | +| 0.1.1 | 2021-09-16 | [6075](https://github.com/airbytehq/airbyte/issues/6075) | Added option to select project region | +| 0.1.0 | 2021-07-06 | [3698](https://github.com/airbytehq/airbyte/issues/3698) | Created CDK native mixpanel connector |