diff --git a/airbyte-integrations/connectors/source-s3/Dockerfile b/airbyte-integrations/connectors/source-s3/Dockerfile
index cf94ff45b91f..c5ae0b44d617 100644
--- a/airbyte-integrations/connectors/source-s3/Dockerfile
+++ b/airbyte-integrations/connectors/source-s3/Dockerfile
@@ -17,5 +17,5 @@ COPY source_s3 ./source_s3
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
-LABEL io.airbyte.version=4.0.3
+LABEL io.airbyte.version=4.0.4
LABEL io.airbyte.name=airbyte/source-s3
diff --git a/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml b/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml
index 9d89593e7fc3..8dec5340d77b 100644
--- a/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml
+++ b/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml
@@ -110,37 +110,37 @@ acceptance_tests:
tests:
- config_path: secrets/config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_csv_custom_encoding_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_csv_custom_format_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_csv_user_schema_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_csv_no_header_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_csv_skip_rows_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_csv_with_nulls_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_parquet_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_avro_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_jsonl_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
- config_path: secrets/v4_jsonl_newlines_config.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
full_refresh:
tests:
- config_path: secrets/config.json
@@ -190,6 +190,6 @@ acceptance_tests:
tests:
- spec_path: integration_tests/spec.json
backward_compatibility_tests_config:
- disable_for_version: "3.1.11" # Switch to v4 changed config shape
+ disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format`
connector_image: airbyte/source-s3:dev
test_strictness_level: high
diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json
index 6f4f5934dd7b..b6c59730c571 100644
--- a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json
+++ b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json
@@ -29,11 +29,6 @@
"description": "The name of the stream.",
"type": "string"
},
- "file_type": {
- "title": "File Type",
- "description": "The data file type that is being extracted for a stream.",
- "type": "string"
- },
"globs": {
"title": "Globs",
"description": "The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look here.",
@@ -283,7 +278,7 @@
"type": "boolean"
}
},
- "required": ["name", "file_type"]
+ "required": ["name", "format"]
}
},
"bucket": {
diff --git a/airbyte-integrations/connectors/source-s3/metadata.yaml b/airbyte-integrations/connectors/source-s3/metadata.yaml
index 0db0c8596a60..5aab398e589b 100644
--- a/airbyte-integrations/connectors/source-s3/metadata.yaml
+++ b/airbyte-integrations/connectors/source-s3/metadata.yaml
@@ -5,7 +5,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2
- dockerImageTag: 4.0.3
+ dockerImageTag: 4.0.4
dockerRepository: airbyte/source-s3
githubIssueLabel: source-s3
icon: s3.svg
@@ -25,6 +25,9 @@ data:
4.0.0:
message: "UX improvement, multi-stream support and deprecation of some parsing features"
upgradeDeadline: "2023-10-05"
+ 4.0.4:
+ message: "Following 4.0.0 config change, we are eliminating the `streams.*.file_type` field which was redundant with `streams.*.format`"
+ upgradeDeadline: "2023-10-18"
ab_internal:
sl: 300
ql: 400
diff --git a/airbyte-integrations/connectors/source-s3/setup.py b/airbyte-integrations/connectors/source-s3/setup.py
index 592e4faf0ad3..3b0712809155 100644
--- a/airbyte-integrations/connectors/source-s3/setup.py
+++ b/airbyte-integrations/connectors/source-s3/setup.py
@@ -6,7 +6,7 @@
from setuptools import find_packages, setup
MAIN_REQUIREMENTS = [
- "airbyte-cdk>=0.51.14",
+ "airbyte-cdk>=0.51.17",
"pyarrow==12.0.1",
"smart-open[s3]==5.1.0",
"wcmatch==8.4",
diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/legacy_config_transformer.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/legacy_config_transformer.py
index b2f8c52f1ffc..4d04411a6694 100644
--- a/airbyte-integrations/connectors/source-s3/source_s3/v4/legacy_config_transformer.py
+++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/legacy_config_transformer.py
@@ -29,7 +29,6 @@ def convert(cls, legacy_config: SourceS3Spec) -> Mapping[str, Any]:
"streams": [
{
"name": legacy_config.dataset,
- "file_type": legacy_config.format.filetype,
"globs": cls._create_globs(legacy_config.path_pattern),
"legacy_prefix": legacy_config.provider.path_prefix,
"validation_policy": "Emit Record",
diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_cursor.py b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_cursor.py
index 7e612ba89e64..d81616038277 100644
--- a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_cursor.py
+++ b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_cursor.py
@@ -7,6 +7,7 @@
from unittest.mock import Mock
import pytest
+from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
@@ -486,7 +487,7 @@ def test_get_adjusted_date_timestamp(cursor_datetime, file_datetime, expected_ad
def _init_cursor_with_state(input_state, max_history_size: Optional[int] = None) -> Cursor:
- cursor = Cursor(stream_config=FileBasedStreamConfig(file_type="csv", name="test", validation_policy="Emit Record"))
+ cursor = Cursor(stream_config=FileBasedStreamConfig(name="test", validation_policy="Emit Record", format=CsvFormat()))
cursor.set_initial_state(input_state)
if max_history_size is not None:
cursor.DEFAULT_MAX_HISTORY_SIZE = max_history_size
diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_legacy_config_transformer.py b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_legacy_config_transformer.py
index 52d883a84006..e21176dcd951 100644
--- a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_legacy_config_transformer.py
+++ b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_legacy_config_transformer.py
@@ -37,7 +37,6 @@
"streams": [
{
"name": "test_data",
- "file_type": "avro",
"globs": ["**/*.avro"],
"legacy_prefix": "a_folder/",
"validation_policy": "Emit Record",
@@ -65,7 +64,6 @@
"streams": [
{
"name": "test_data",
- "file_type": "avro",
"globs": ["**/*.avro"],
"legacy_prefix": "",
"validation_policy": "Emit Record",
@@ -93,7 +91,6 @@
"streams": [
{
"name": "test_data",
- "file_type": "avro",
"globs": ["*.csv", "**/*"],
"validation_policy": "Emit Record",
"legacy_prefix": "a_prefix/",
@@ -393,7 +390,6 @@ def test_convert_file_format(file_type, legacy_format_config, expected_format_co
"streams": [
{
"name": "test_data",
- "file_type": file_type,
"globs": [f"**/*.{file_type}"],
"legacy_prefix": "",
"validation_policy": "Emit Record",
diff --git a/docs/integrations/sources/s3-migrations.md b/docs/integrations/sources/s3-migrations.md
index 11e2b083cec3..18e6bdb11947 100644
--- a/docs/integrations/sources/s3-migrations.md
+++ b/docs/integrations/sources/s3-migrations.md
@@ -1,5 +1,12 @@
# S3 Migration Guide
+## Upgrading to 4.0.4
+
+Note: This change is only breaking if you created S3 sources using the API and did not provide `streams.*.format`.
+
+Following 4.0.0 config change, we are removing `streams.*.file_type` field which was redundant with `streams.*.format`. This is a breaking change as `format` now needs to be required. Given that the UI would always populate `format`, only users creating actors using the API and not providing `format` are be affected. In order to fix that, simply set `streams.*.format` to `{"filetype": }`.
+
+
## Upgrading to 4.0.0
We have revamped the implementation to use the File-Based CDK. The goal is to increase resiliency and reduce development time. Here are the breaking changes:
@@ -18,3 +25,4 @@ Other than breaking changes, we have changed the UI from which the user configur
* You can now configure multiple streams by clicking on `Add` under `Streams`.
* `Output Stream Name` has been renamed to `Name` when configuring a specific stream.
* `Pattern of files to replicate` field has been renamed `Globs` under the stream configuration.
+
diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md
index a3de6ceaca35..6999631b43e6 100644
--- a/docs/integrations/sources/s3.md
+++ b/docs/integrations/sources/s3.md
@@ -236,9 +236,10 @@ There are currently no options for JSONL parsing.
## Changelog
| Version | Date | Pull Request | Subject |
-|:--------|:-----------| :-------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------- |
-| 4.0.3 | 2023-09-13 | [30387](https://github.com/airbytehq/airbyte/pull/30387) | Bump Airbyte-CDK version to improve messages for record parse errors
-| 4.0.2 | 2023-09-07 | [28639](https://github.com/airbytehq/airbyte/pull/28639) | Always show S3 Key fields
+|:--------|:-----------| :-------------------------------------------------------------------------------------------------------------- |:---------------------------------------------------------------------------------------------------------------------|
+| 4.0.4 | 2023-09-18 | [30476](https://github.com/airbytehq/airbyte/pull/30476) | Remove streams.*.file_type from source-s3 configuration |
+| 4.0.3 | 2023-09-13 | [30387](https://github.com/airbytehq/airbyte/pull/30387) | Bump Airbyte-CDK version to improve messages for record parse errors |
+| 4.0.2 | 2023-09-07 | [28639](https://github.com/airbytehq/airbyte/pull/28639) | Always show S3 Key fields |
| 4.0.1 | 2023-09-06 | [30217](https://github.com/airbytehq/airbyte/pull/30217) | Migrate inference error to config errors and avoir sentry alerts |
| 4.0.0 | 2023-09-05 | [29757](https://github.com/airbytehq/airbyte/pull/29757) | New version using file-based CDK |
| 3.1.11 | 2023-08-30 | [29986](https://github.com/airbytehq/airbyte/pull/29986) | Add config error for conversion error |