diff --git a/airbyte-integrations/connectors/source-s3/Dockerfile b/airbyte-integrations/connectors/source-s3/Dockerfile index cf94ff45b91f..c5ae0b44d617 100644 --- a/airbyte-integrations/connectors/source-s3/Dockerfile +++ b/airbyte-integrations/connectors/source-s3/Dockerfile @@ -17,5 +17,5 @@ COPY source_s3 ./source_s3 ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=4.0.3 +LABEL io.airbyte.version=4.0.4 LABEL io.airbyte.name=airbyte/source-s3 diff --git a/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml b/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml index 9d89593e7fc3..8dec5340d77b 100644 --- a/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml @@ -110,37 +110,37 @@ acceptance_tests: tests: - config_path: secrets/config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_csv_custom_encoding_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_csv_custom_format_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_csv_user_schema_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_csv_no_header_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_csv_skip_rows_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_csv_with_nulls_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_parquet_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_avro_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_jsonl_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` - config_path: secrets/v4_jsonl_newlines_config.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` full_refresh: tests: - config_path: secrets/config.json @@ -190,6 +190,6 @@ acceptance_tests: tests: - spec_path: integration_tests/spec.json backward_compatibility_tests_config: - disable_for_version: "3.1.11" # Switch to v4 changed config shape + disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` connector_image: airbyte/source-s3:dev test_strictness_level: high diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json index 6f4f5934dd7b..b6c59730c571 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json @@ -29,11 +29,6 @@ "description": "The name of the stream.", "type": "string" }, - "file_type": { - "title": "File Type", - "description": "The data file type that is being extracted for a stream.", - "type": "string" - }, "globs": { "title": "Globs", "description": "The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look here.", @@ -283,7 +278,7 @@ "type": "boolean" } }, - "required": ["name", "file_type"] + "required": ["name", "format"] } }, "bucket": { diff --git a/airbyte-integrations/connectors/source-s3/metadata.yaml b/airbyte-integrations/connectors/source-s3/metadata.yaml index 0db0c8596a60..5aab398e589b 100644 --- a/airbyte-integrations/connectors/source-s3/metadata.yaml +++ b/airbyte-integrations/connectors/source-s3/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: file connectorType: source definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 - dockerImageTag: 4.0.3 + dockerImageTag: 4.0.4 dockerRepository: airbyte/source-s3 githubIssueLabel: source-s3 icon: s3.svg @@ -25,6 +25,9 @@ data: 4.0.0: message: "UX improvement, multi-stream support and deprecation of some parsing features" upgradeDeadline: "2023-10-05" + 4.0.4: + message: "Following 4.0.0 config change, we are eliminating the `streams.*.file_type` field which was redundant with `streams.*.format`" + upgradeDeadline: "2023-10-18" ab_internal: sl: 300 ql: 400 diff --git a/airbyte-integrations/connectors/source-s3/setup.py b/airbyte-integrations/connectors/source-s3/setup.py index 592e4faf0ad3..3b0712809155 100644 --- a/airbyte-integrations/connectors/source-s3/setup.py +++ b/airbyte-integrations/connectors/source-s3/setup.py @@ -6,7 +6,7 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-cdk>=0.51.14", + "airbyte-cdk>=0.51.17", "pyarrow==12.0.1", "smart-open[s3]==5.1.0", "wcmatch==8.4", diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/legacy_config_transformer.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/legacy_config_transformer.py index b2f8c52f1ffc..4d04411a6694 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/legacy_config_transformer.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/legacy_config_transformer.py @@ -29,7 +29,6 @@ def convert(cls, legacy_config: SourceS3Spec) -> Mapping[str, Any]: "streams": [ { "name": legacy_config.dataset, - "file_type": legacy_config.format.filetype, "globs": cls._create_globs(legacy_config.path_pattern), "legacy_prefix": legacy_config.provider.path_prefix, "validation_policy": "Emit Record", diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_cursor.py b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_cursor.py index 7e612ba89e64..d81616038277 100644 --- a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_cursor.py +++ b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_cursor.py @@ -7,6 +7,7 @@ from unittest.mock import Mock import pytest +from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor @@ -486,7 +487,7 @@ def test_get_adjusted_date_timestamp(cursor_datetime, file_datetime, expected_ad def _init_cursor_with_state(input_state, max_history_size: Optional[int] = None) -> Cursor: - cursor = Cursor(stream_config=FileBasedStreamConfig(file_type="csv", name="test", validation_policy="Emit Record")) + cursor = Cursor(stream_config=FileBasedStreamConfig(name="test", validation_policy="Emit Record", format=CsvFormat())) cursor.set_initial_state(input_state) if max_history_size is not None: cursor.DEFAULT_MAX_HISTORY_SIZE = max_history_size diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_legacy_config_transformer.py b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_legacy_config_transformer.py index 52d883a84006..e21176dcd951 100644 --- a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_legacy_config_transformer.py +++ b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_legacy_config_transformer.py @@ -37,7 +37,6 @@ "streams": [ { "name": "test_data", - "file_type": "avro", "globs": ["**/*.avro"], "legacy_prefix": "a_folder/", "validation_policy": "Emit Record", @@ -65,7 +64,6 @@ "streams": [ { "name": "test_data", - "file_type": "avro", "globs": ["**/*.avro"], "legacy_prefix": "", "validation_policy": "Emit Record", @@ -93,7 +91,6 @@ "streams": [ { "name": "test_data", - "file_type": "avro", "globs": ["*.csv", "**/*"], "validation_policy": "Emit Record", "legacy_prefix": "a_prefix/", @@ -393,7 +390,6 @@ def test_convert_file_format(file_type, legacy_format_config, expected_format_co "streams": [ { "name": "test_data", - "file_type": file_type, "globs": [f"**/*.{file_type}"], "legacy_prefix": "", "validation_policy": "Emit Record", diff --git a/docs/integrations/sources/s3-migrations.md b/docs/integrations/sources/s3-migrations.md index 11e2b083cec3..18e6bdb11947 100644 --- a/docs/integrations/sources/s3-migrations.md +++ b/docs/integrations/sources/s3-migrations.md @@ -1,5 +1,12 @@ # S3 Migration Guide +## Upgrading to 4.0.4 + +Note: This change is only breaking if you created S3 sources using the API and did not provide `streams.*.format`. + +Following 4.0.0 config change, we are removing `streams.*.file_type` field which was redundant with `streams.*.format`. This is a breaking change as `format` now needs to be required. Given that the UI would always populate `format`, only users creating actors using the API and not providing `format` are be affected. In order to fix that, simply set `streams.*.format` to `{"filetype": }`. + + ## Upgrading to 4.0.0 We have revamped the implementation to use the File-Based CDK. The goal is to increase resiliency and reduce development time. Here are the breaking changes: @@ -18,3 +25,4 @@ Other than breaking changes, we have changed the UI from which the user configur * You can now configure multiple streams by clicking on `Add` under `Streams`. * `Output Stream Name` has been renamed to `Name` when configuring a specific stream. * `Pattern of files to replicate` field has been renamed `Globs` under the stream configuration. + diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index a3de6ceaca35..6999631b43e6 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -236,9 +236,10 @@ There are currently no options for JSONL parsing. ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------| :-------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------- | -| 4.0.3 | 2023-09-13 | [30387](https://github.com/airbytehq/airbyte/pull/30387) | Bump Airbyte-CDK version to improve messages for record parse errors -| 4.0.2 | 2023-09-07 | [28639](https://github.com/airbytehq/airbyte/pull/28639) | Always show S3 Key fields +|:--------|:-----------| :-------------------------------------------------------------------------------------------------------------- |:---------------------------------------------------------------------------------------------------------------------| +| 4.0.4 | 2023-09-18 | [30476](https://github.com/airbytehq/airbyte/pull/30476) | Remove streams.*.file_type from source-s3 configuration | +| 4.0.3 | 2023-09-13 | [30387](https://github.com/airbytehq/airbyte/pull/30387) | Bump Airbyte-CDK version to improve messages for record parse errors | +| 4.0.2 | 2023-09-07 | [28639](https://github.com/airbytehq/airbyte/pull/28639) | Always show S3 Key fields | | 4.0.1 | 2023-09-06 | [30217](https://github.com/airbytehq/airbyte/pull/30217) | Migrate inference error to config errors and avoir sentry alerts | | 4.0.0 | 2023-09-05 | [29757](https://github.com/airbytehq/airbyte/pull/29757) | New version using file-based CDK | | 3.1.11 | 2023-08-30 | [29986](https://github.com/airbytehq/airbyte/pull/29986) | Add config error for conversion error |