diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile index fa3b8daf6d7e..7b405c1107b8 100644 --- a/airbyte-integrations/connectors/source-file-secure/Dockerfile +++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile @@ -6,7 +6,7 @@ # If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder. # Please reach out to the Connectors Operations team if you have any question. -FROM airbyte/source-file:0.3.11 +FROM airbyte/source-file:0.3.12 WORKDIR /airbyte/integration_code COPY source_file_secure ./source_file_secure @@ -18,5 +18,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.3.11 +LABEL io.airbyte.version=0.3.12 LABEL io.airbyte.name=airbyte/source-file-secure diff --git a/airbyte-integrations/connectors/source-file-secure/metadata.yaml b/airbyte-integrations/connectors/source-file-secure/metadata.yaml index 9bccb5d2ef88..68943185eaba 100644 --- a/airbyte-integrations/connectors/source-file-secure/metadata.yaml +++ b/airbyte-integrations/connectors/source-file-secure/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: file connectorType: source definitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77 - dockerImageTag: 0.3.11 + dockerImageTag: 0.3.12 dockerRepository: airbyte/source-file-secure githubIssueLabel: source-file icon: file.svg diff --git a/airbyte-integrations/connectors/source-file/Dockerfile b/airbyte-integrations/connectors/source-file/Dockerfile index 5755c3e6505d..197c211c4a35 100644 --- a/airbyte-integrations/connectors/source-file/Dockerfile +++ b/airbyte-integrations/connectors/source-file/Dockerfile @@ -17,5 +17,5 @@ COPY source_file ./source_file ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.3.11 +LABEL io.airbyte.version=0.3.12 LABEL io.airbyte.name=airbyte/source-file diff --git a/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py b/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py index bdf0a835d2a5..2f9b195df3c7 100644 --- a/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py +++ b/airbyte-integrations/connectors/source-file/integration_tests/file_formats_test.py @@ -7,8 +7,9 @@ import pytest from airbyte_cdk import AirbyteLogger +from airbyte_cdk.utils import AirbyteTracedException from source_file import SourceFile -from source_file.client import Client, ConfigurationError +from source_file.client import Client SAMPLE_DIRECTORY = Path(__file__).resolve().parent.joinpath("sample_files/formats") @@ -59,7 +60,7 @@ def test_raises_file_wrong_format(file_format, extension, wrong_format, filename file_path = str(file_directory.joinpath(f"{filename}.{extension}")) configs = {"dataset_name": "test", "format": wrong_format, "url": file_path, "provider": {"storage": "local"}} client = Client(**configs) - with pytest.raises((TypeError, ValueError, ConfigurationError)): + with pytest.raises((TypeError, ValueError, AirbyteTracedException)): list(client.read()) diff --git a/airbyte-integrations/connectors/source-file/metadata.yaml b/airbyte-integrations/connectors/source-file/metadata.yaml index a6a868e7b8c3..8fc293d70db7 100644 --- a/airbyte-integrations/connectors/source-file/metadata.yaml +++ b/airbyte-integrations/connectors/source-file/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: file connectorType: source definitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77 - dockerImageTag: 0.3.11 + dockerImageTag: 0.3.12 dockerRepository: airbyte/source-file githubIssueLabel: source-file icon: file.svg @@ -14,7 +14,7 @@ data: registries: cloud: dockerRepository: airbyte/source-file-secure - dockerImageTag: 0.3.11 # Dont forget to publish source-file-secure as well when updating this. + dockerImageTag: 0.3.12 # Dont forget to publish source-file-secure as well when updating this. enabled: true oss: enabled: true diff --git a/airbyte-integrations/connectors/source-file/source_file/client.py b/airbyte-integrations/connectors/source-file/source_file/client.py index 1c6dd8cb7c0a..5cf261f5978a 100644 --- a/airbyte-integrations/connectors/source-file/source_file/client.py +++ b/airbyte-integrations/connectors/source-file/source_file/client.py @@ -44,14 +44,6 @@ logging.getLogger("smart_open").setLevel(logging.ERROR) -class ConfigurationError(Exception): - """Client mis-configured""" - - -class PermissionsError(Exception): - """User don't have enough permissions""" - - class URLFile: """Class to manage read from file located at different providers @@ -211,7 +203,7 @@ def _open_gcs_url(self) -> object: except json.decoder.JSONDecodeError as err: error_msg = f"Failed to parse gcs service account json: {repr(err)}" logger.error(f"{error_msg}\n{traceback.format_exc()}") - raise ConfigurationError(error_msg) from err + raise AirbyteTracedException(message=error_msg, internal_message=error_msg, failure_type=FailureType.config_error) from err if credentials: credentials = service_account.Credentials.from_service_account_info(credentials) @@ -341,7 +333,7 @@ def load_dataframes(self, fp, skip_data=False, read_sample_chunk: bool = False) except KeyError as err: error_msg = f"Reader {self._reader_format} is not supported." logger.error(f"{error_msg}\n{traceback.format_exc()}") - raise ConfigurationError(error_msg) from err + raise AirbyteTracedException(message=error_msg, internal_message=error_msg, failure_type=FailureType.config_error) from err reader_options = {**self._reader_options} try: @@ -367,13 +359,17 @@ def load_dataframes(self, fp, skip_data=False, read_sample_chunk: bool = False) yield reader(fp, **reader_options) else: yield reader(fp, **reader_options) + except ParserError as err: + error_msg = f"File {fp} can not be parsed. Please check your reader_options. https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html" + logger.error(f"{error_msg}\n{traceback.format_exc()}") + raise AirbyteTracedException(message=error_msg, internal_message=error_msg, failure_type=FailureType.config_error) from err except UnicodeDecodeError as err: error_msg = ( f"File {fp} can't be parsed with reader of chosen type ({self._reader_format}). " f"Please check provided Format and Reader Options. {repr(err)}." ) logger.error(f"{error_msg}\n{traceback.format_exc()}") - raise ConfigurationError(error_msg) from err + raise AirbyteTracedException(message=error_msg, internal_message=error_msg, failure_type=FailureType.config_error) from err @staticmethod def dtype_to_json_type(current_type: str, dtype) -> str: @@ -430,11 +426,7 @@ def read(self, fields: Iterable = None) -> Iterable[dict]: f"File {fp} can not be opened due to connection issues on provider side. Please check provided links and options" ) logger.error(f"{error_msg}\n{traceback.format_exc()}") - raise ConfigurationError(error_msg) from err - except ParserError as err: - error_msg = f"File {fp} can not be parsed. Please check your reader_options. https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html" - logger.error(f"{error_msg}\n{traceback.format_exc()}") - raise ConfigurationError(error_msg) from err + raise AirbyteTracedException(message=error_msg, internal_message=error_msg, failure_type=FailureType.config_error) from err def _cache_stream(self, fp): """cache stream to file""" diff --git a/airbyte-integrations/connectors/source-file/source_file/source.py b/airbyte-integrations/connectors/source-file/source_file/source.py index aa8af0a4af6f..59d5a966f6f1 100644 --- a/airbyte-integrations/connectors/source-file/source_file/source.py +++ b/airbyte-integrations/connectors/source-file/source_file/source.py @@ -17,12 +17,14 @@ AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog, + FailureType, Status, Type, ) from airbyte_cdk.sources import Source +from airbyte_cdk.utils import AirbyteTracedException -from .client import Client, ConfigurationError +from .client import Client from .utils import dropbox_force_download @@ -85,19 +87,22 @@ def _validate_and_transform(config: Mapping[str, Any]): try: config["reader_options"] = json.loads(config["reader_options"]) if not isinstance(config["reader_options"], dict): - raise ConfigurationError( + message = ( "Field 'reader_options' is not a valid JSON object. " "Please provide key-value pairs, See field description for examples." ) + raise AirbyteTracedException(message=message, internal_message=message, failure_type=FailureType.config_error) except ValueError: - raise ConfigurationError("Field 'reader_options' is not valid JSON object. https://www.json.org/") + message = "Field 'reader_options' is not valid JSON object. https://www.json.org/" + raise AirbyteTracedException(message=message, internal_message=message, failure_type=FailureType.config_error) else: config["reader_options"] = {} config["url"] = dropbox_force_download(config["url"]) parse_result = urlparse(config["url"]) if parse_result.netloc == "docs.google.com" and parse_result.path.lower().startswith("/spreadsheets/"): - raise ConfigurationError(f'Failed to load {config["url"]}: please use the Official Google Sheets Source connector') + message = f'Failed to load {config["url"]}: please use the Official Google Sheets Source connector' + raise AirbyteTracedException(message=message, internal_message=message, failure_type=FailureType.config_error) return config def check(self, logger, config: Mapping) -> AirbyteConnectionStatus: @@ -105,21 +110,16 @@ def check(self, logger, config: Mapping) -> AirbyteConnectionStatus: Check involves verifying that the specified file is reachable with our credentials. """ - try: - config = self._validate_and_transform(config) - except ConfigurationError as e: - logger.error(str(e)) - return AirbyteConnectionStatus(status=Status.FAILED, message=str(e)) - + config = self._validate_and_transform(config) client = self._get_client(config) source_url = client.reader.full_url try: list(client.streams(empty_schema=True)) return AirbyteConnectionStatus(status=Status.SUCCEEDED) - except (TypeError, ValueError, ConfigurationError) as err: + except (TypeError, ValueError, AirbyteTracedException) as err: reason = f"Failed to load {source_url}. Please check File Format and Reader Options are set correctly." logger.error(f"{reason}\n{repr(err)}") - return AirbyteConnectionStatus(status=Status.FAILED, message=reason) + raise AirbyteTracedException(message=reason, internal_message=reason, failure_type=FailureType.config_error) except Exception as err: reason = f"Failed to load {source_url}. You could have provided an invalid URL, please verify it: {repr(err)}." logger.error(reason) diff --git a/airbyte-integrations/connectors/source-file/unit_tests/test_client.py b/airbyte-integrations/connectors/source-file/unit_tests/test_client.py index 4f23d96bd885..6eb9a11a3242 100644 --- a/airbyte-integrations/connectors/source-file/unit_tests/test_client.py +++ b/airbyte-integrations/connectors/source-file/unit_tests/test_client.py @@ -6,9 +6,10 @@ from unittest.mock import patch, sentinel import pytest +from airbyte_cdk.utils import AirbyteTracedException from pandas import read_csv, read_excel from paramiko import SSHException -from source_file.client import Client, ConfigurationError, URLFile +from source_file.client import Client, URLFile from urllib3.exceptions import ProtocolError @@ -57,7 +58,7 @@ def test_load_dataframes(client, wrong_format_client, absolute_path, test_files) expected = read_csv(f) assert read_file.equals(expected) - with pytest.raises(ConfigurationError): + with pytest.raises(AirbyteTracedException): next(wrong_format_client.load_dataframes(fp=f)) with pytest.raises(StopIteration): @@ -66,7 +67,7 @@ def test_load_dataframes(client, wrong_format_client, absolute_path, test_files) def test_raises_configuration_error_with_incorrect_file_type(csv_format_client, absolute_path, test_files): f = f"{absolute_path}/{test_files}/archive_with_test_xlsx.zip" - with pytest.raises(ConfigurationError): + with pytest.raises(AirbyteTracedException): next(csv_format_client.load_dataframes(fp=f)) @@ -139,7 +140,7 @@ def test_open_gcs_url(): assert URLFile(url="", provider=provider)._open_gcs_url() provider.update({"service_account_json": '{service_account_json": "service_account_json"}'}) - with pytest.raises(ConfigurationError): + with pytest.raises(AirbyteTracedException): assert URLFile(url="", provider=provider)._open_gcs_url() @@ -158,7 +159,7 @@ def test_read_network_issues(test_read_config): test_read_config.update(format='excel') client = Client(**test_read_config) client.sleep_on_retry_sec = 0 # just for test - with patch.object(client, "_cache_stream", side_effect=ProtocolError), pytest.raises(ConfigurationError): + with patch.object(client, "_cache_stream", side_effect=ProtocolError), pytest.raises(AirbyteTracedException): next(client.read(["date", "key"])) diff --git a/airbyte-integrations/connectors/source-file/unit_tests/test_source.py b/airbyte-integrations/connectors/source-file/unit_tests/test_source.py index b1704e6673d9..e43080625bbe 100644 --- a/airbyte-integrations/connectors/source-file/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-file/unit_tests/test_source.py @@ -5,7 +5,6 @@ import json import logging from copy import deepcopy -from unittest.mock import PropertyMock import jsonschema import pytest @@ -21,7 +20,7 @@ SyncMode, Type, ) -from source_file.client import ConfigurationError +from airbyte_cdk.utils import AirbyteTracedException from source_file.source import SourceFile logger = logging.getLogger("airbyte") @@ -133,9 +132,8 @@ def test_check_invalid_config(source, invalid_config): def test_check_invalid_reader_options(source, invalid_reader_options_config): - expected = AirbyteConnectionStatus(status=Status.FAILED) - actual = source.check(logger=logger, config=invalid_reader_options_config) - assert actual.status == expected.status + with pytest.raises(AirbyteTracedException, match="Field 'reader_options' is not a valid JSON object. Please provide key-value pairs"): + source.check(logger=logger, config=invalid_reader_options_config) def test_discover_dropbox_link(source, config_dropbox_link): @@ -149,25 +147,17 @@ def test_discover(source, config, client): for schema in schemas: jsonschema.Draft7Validator.check_schema(schema) - type(client).streams = PropertyMock(side_effect=Exception) - - with pytest.raises(Exception): - source.discover(logger=logger, config=config) - def test_check_wrong_reader_options(source, config): config["reader_options"] = '{encoding":"utf_16"}' - assert source.check(logger=logger, config=config) == AirbyteConnectionStatus( - status=Status.FAILED, message="Field 'reader_options' is not valid JSON object. https://www.json.org/" - ) + with pytest.raises(AirbyteTracedException, match="Field 'reader_options' is not valid JSON object. https://www.json.org/"): + source.check(logger=logger, config=config) def test_check_google_spreadsheets_url(source, config): config["url"] = "https://docs.google.com/spreadsheets/d/" - assert source.check(logger=logger, config=config) == AirbyteConnectionStatus( - status=Status.FAILED, - message="Failed to load https://docs.google.com/spreadsheets/d/: please use the Official Google Sheets Source connector", - ) + with pytest.raises(AirbyteTracedException, match="Failed to load https://docs.google.com/spreadsheets/d/: please use the Official Google Sheets Source connector"): + source.check(logger=logger, config=config) def test_pandas_header_not_none(absolute_path, test_files): @@ -218,9 +208,11 @@ def test_incorrect_reader_options(absolute_path, test_files): "provider": {"storage": "local"}, } - catalog = get_catalog({"0": {"type": ["string", "null"]}, "1": {"type": ["string", "null"]}}) source = SourceFile() - with pytest.raises(ConfigurationError) as e: + with pytest.raises(AirbyteTracedException, match="can not be parsed. Please check your reader_options. https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html"): + _ = source.discover(logger=logger, config=deepcopy(config)) + + with pytest.raises(AirbyteTracedException, match="can not be parsed. Please check your reader_options. https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html"): + catalog = get_catalog({"0": {"type": ["string", "null"]}, "1": {"type": ["string", "null"]}}) records = source.read(logger=logger, config=deepcopy(config), catalog=catalog) records = [r.record.data for r in records] - assert "can not be parsed. Please check your reader_options. https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html" in str(e.value) diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md index 99ee0add09bd..cc8be7e7ca84 100644 --- a/docs/integrations/sources/file.md +++ b/docs/integrations/sources/file.md @@ -214,59 +214,60 @@ In order to read large files from a remote location, this connector uses the [sm ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:-----------------------------------------------------------|:--------------------------------------------------------------------------------------------------------| -| 0.3.11 | 2023-06-08 | [27157](https://github.com/airbytehq/airbyte/pull/27157) | Force smart open log level to ERROR | -| 0.3.10 | 2023-06-07 | [27107](https://github.com/airbytehq/airbyte/pull/27107) | Make source-file testable in our new airbyte-ci pipelines | -| 0.3.9 | 2023-05-18 | [26275](https://github.com/airbytehq/airbyte/pull/26275) | Add ParserError handling | -| 0.3.8 | 2023-05-17 | [26210](https://github.com/airbytehq/airbyte/pull/26210) | Bugfix for https://github.com/airbytehq/airbyte/pull/26115 | -| 0.3.7 | 2023-05-16 | [26131](https://github.com/airbytehq/airbyte/pull/26131) | Re-release source-file to be in sync with source-file-secure | -| 0.3.6 | 2023-05-16 | [26115](https://github.com/airbytehq/airbyte/pull/26115) | Add retry on SSHException('Error reading SSH protocol banner') | -| 0.3.5 | 2023-05-16 | [26117](https://github.com/airbytehq/airbyte/pull/26117) | Check if reader options is a valid JSON object | -| 0.3.4 | 2023-05-10 | [25965](https://github.com/airbytehq/airbyte/pull/25965) | fix Pandas date-time parsing to airbyte type | -| 0.3.3 | 2023-05-04 | [25819](https://github.com/airbytehq/airbyte/pull/25819) | GCP service_account_json is a secret | -| 0.3.2 | 2023-05-01 | [25641](https://github.com/airbytehq/airbyte/pull/25641) | Handle network errors | -| 0.3.1 | 2023-04-27 | [25575](https://github.com/airbytehq/airbyte/pull/25575) | Fix OOM; read Excel files in chunks using `openpyxl` | -| 0.3.0 | 2023-04-24 | [25445](https://github.com/airbytehq/airbyte/pull/25445) | Add datatime format parsing support for csv files | -| 0.2.38 | 2023-04-12 | [23759](https://github.com/airbytehq/airbyte/pull/23759) | Fix column data types for numerical values | -| 0.2.37 | 2023-04-06 | [24525](https://github.com/airbytehq/airbyte/pull/24525) | Fix examples in spec | -| 0.2.36 | 2023-03-27 | [24588](https://github.com/airbytehq/airbyte/pull/24588) | Remove traceback from user messages. | -| 0.2.35 | 2023-03-03 | [24278](https://github.com/airbytehq/airbyte/pull/24278) | Read only file header when checking connectivity; read only a single chunk when discovering the schema. | -| 0.2.34 | 2023-03-03 | [23723](https://github.com/airbytehq/airbyte/pull/23723) | Update description in spec, make user-friendly error messages and docs. | -| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug | -| 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s | -| 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 | -| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command | -| 0.2.29 | 2022-11-08 | [18587](https://github.com/airbytehq/airbyte/pull/18587) | Fix pandas read_csv header none issue. | -| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Add retry logic for `Connection reset error - 104` | -| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format | -| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link | -| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. | -| 0.2.24 | 2022-10-03 | [17504](https://github.com/airbytehq/airbyte/pull/17504) | Validate data for `HTTPS` while `check_connection` | -| 0.2.23 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | -| 0.2.22 | 2022-09-15 | [16772](https://github.com/airbytehq/airbyte/pull/16772) | Fix schema generation for JSON files containing arrays | -| 0.2.21 | 2022-08-26 | [15568](https://github.com/airbytehq/airbyte/pull/15568) | Specify `pyxlsb` library for Excel Binary Workbook files | -| 0.2.20 | 2022-08-23 | [15870](https://github.com/airbytehq/airbyte/pull/15870) | Fix CSV schema discovery | -| 0.2.19 | 2022-08-19 | [15768](https://github.com/airbytehq/airbyte/pull/15768) | Convert 'nan' to 'null' | -| 0.2.18 | 2022-08-16 | [15698](https://github.com/airbytehq/airbyte/pull/15698) | Cache binary stream to file for discover | -| 0.2.17 | 2022-08-11 | [15501](https://github.com/airbytehq/airbyte/pull/15501) | Cache binary stream to file | -| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | Add support for encoding reader option | -| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 | -| 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files | -| 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format | -| 0.2.9 | 2022-02-01 | [9974](https://github.com/airbytehq/airbyte/pull/9974) | Update airbyte-cdk 0.1.47 | -| 0.2.8 | 2021-12-06 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | -| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. | -| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format | -| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type | -| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | -| 0.2.3 | 2021-06-01 | [3771](https://github.com/airbytehq/airbyte/pull/3771) | Add Azure Storage Blob Files option | -| 0.2.2 | 2021-04-16 | [2883](https://github.com/airbytehq/airbyte/pull/2883) | Fix CSV discovery memory consumption | -| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | -| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | -| 0.1.10 | 2021-02-18 | [2118](https://github.com/airbytehq/airbyte/pull/2118) | Support JSONL format | -| 0.1.9 | 2021-02-02 | [1768](https://github.com/airbytehq/airbyte/pull/1768) | Add test cases for all formats | -| 0.1.8 | 2021-01-27 | [1738](https://github.com/airbytehq/airbyte/pull/1738) | Adopt connector best practices | -| 0.1.7 | 2020-12-16 | [1331](https://github.com/airbytehq/airbyte/pull/1331) | Refactor Python base connector | -| 0.1.6 | 2020-12-08 | [1249](https://github.com/airbytehq/airbyte/pull/1249) | Handle NaN values | -| 0.1.5 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------| +| 0.3.12 | 2023-09-19 | [30579](https://github.com/airbytehq/airbyte/pull/30579) | Add ParserError handling for `discovery` | +| 0.3.11 | 2023-06-08 | [27157](https://github.com/airbytehq/airbyte/pull/27157) | Force smart open log level to ERROR | +| 0.3.10 | 2023-06-07 | [27107](https://github.com/airbytehq/airbyte/pull/27107) | Make source-file testable in our new airbyte-ci pipelines | +| 0.3.9 | 2023-05-18 | [26275](https://github.com/airbytehq/airbyte/pull/26275) | Add ParserError handling | +| 0.3.8 | 2023-05-17 | [26210](https://github.com/airbytehq/airbyte/pull/26210) | Bugfix for https://github.com/airbytehq/airbyte/pull/26115 | +| 0.3.7 | 2023-05-16 | [26131](https://github.com/airbytehq/airbyte/pull/26131) | Re-release source-file to be in sync with source-file-secure | +| 0.3.6 | 2023-05-16 | [26115](https://github.com/airbytehq/airbyte/pull/26115) | Add retry on SSHException('Error reading SSH protocol banner') | +| 0.3.5 | 2023-05-16 | [26117](https://github.com/airbytehq/airbyte/pull/26117) | Check if reader options is a valid JSON object | +| 0.3.4 | 2023-05-10 | [25965](https://github.com/airbytehq/airbyte/pull/25965) | fix Pandas date-time parsing to airbyte type | +| 0.3.3 | 2023-05-04 | [25819](https://github.com/airbytehq/airbyte/pull/25819) | GCP service_account_json is a secret | +| 0.3.2 | 2023-05-01 | [25641](https://github.com/airbytehq/airbyte/pull/25641) | Handle network errors | +| 0.3.1 | 2023-04-27 | [25575](https://github.com/airbytehq/airbyte/pull/25575) | Fix OOM; read Excel files in chunks using `openpyxl` | +| 0.3.0 | 2023-04-24 | [25445](https://github.com/airbytehq/airbyte/pull/25445) | Add datatime format parsing support for csv files | +| 0.2.38 | 2023-04-12 | [23759](https://github.com/airbytehq/airbyte/pull/23759) | Fix column data types for numerical values | +| 0.2.37 | 2023-04-06 | [24525](https://github.com/airbytehq/airbyte/pull/24525) | Fix examples in spec | +| 0.2.36 | 2023-03-27 | [24588](https://github.com/airbytehq/airbyte/pull/24588) | Remove traceback from user messages. | +| 0.2.35 | 2023-03-03 | [24278](https://github.com/airbytehq/airbyte/pull/24278) | Read only file header when checking connectivity; read only a single chunk when discovering the schema. | +| 0.2.34 | 2023-03-03 | [23723](https://github.com/airbytehq/airbyte/pull/23723) | Update description in spec, make user-friendly error messages and docs. | +| 0.2.33 | 2023-01-04 | [21012](https://github.com/airbytehq/airbyte/pull/21012) | Fix special characters bug | +| 0.2.32 | 2022-12-21 | [20740](https://github.com/airbytehq/airbyte/pull/20740) | Source File: increase SSH timeout to 60s | +| 0.2.31 | 2022-11-17 | [19567](https://github.com/airbytehq/airbyte/pull/19567) | Source File: bump 0.2.31 | +| 0.2.30 | 2022-11-10 | [19222](https://github.com/airbytehq/airbyte/pull/19222) | Use AirbyteConnectionStatus for "check" command | +| 0.2.29 | 2022-11-08 | [18587](https://github.com/airbytehq/airbyte/pull/18587) | Fix pandas read_csv header none issue. | +| 0.2.28 | 2022-10-27 | [18428](https://github.com/airbytehq/airbyte/pull/18428) | Add retry logic for `Connection reset error - 104` | +| 0.2.27 | 2022-10-26 | [18481](https://github.com/airbytehq/airbyte/pull/18481) | Fix check for wrong format | +| 0.2.26 | 2022-10-18 | [18116](https://github.com/airbytehq/airbyte/pull/18116) | Transform Dropbox shared link | +| 0.2.25 | 2022-10-14 | [17994](https://github.com/airbytehq/airbyte/pull/17994) | Handle `UnicodeDecodeError` during discover step. | +| 0.2.24 | 2022-10-03 | [17504](https://github.com/airbytehq/airbyte/pull/17504) | Validate data for `HTTPS` while `check_connection` | +| 0.2.23 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | +| 0.2.22 | 2022-09-15 | [16772](https://github.com/airbytehq/airbyte/pull/16772) | Fix schema generation for JSON files containing arrays | +| 0.2.21 | 2022-08-26 | [15568](https://github.com/airbytehq/airbyte/pull/15568) | Specify `pyxlsb` library for Excel Binary Workbook files | +| 0.2.20 | 2022-08-23 | [15870](https://github.com/airbytehq/airbyte/pull/15870) | Fix CSV schema discovery | +| 0.2.19 | 2022-08-19 | [15768](https://github.com/airbytehq/airbyte/pull/15768) | Convert 'nan' to 'null' | +| 0.2.18 | 2022-08-16 | [15698](https://github.com/airbytehq/airbyte/pull/15698) | Cache binary stream to file for discover | +| 0.2.17 | 2022-08-11 | [15501](https://github.com/airbytehq/airbyte/pull/15501) | Cache binary stream to file | +| 0.2.16 | 2022-08-10 | [15293](https://github.com/airbytehq/airbyte/pull/15293) | Add support for encoding reader option | +| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 | +| 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files | +| 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format | +| 0.2.9 | 2022-02-01 | [9974](https://github.com/airbytehq/airbyte/pull/9974) | Update airbyte-cdk 0.1.47 | +| 0.2.8 | 2021-12-06 | [8524](https://github.com/airbytehq/airbyte/pull/8524) | Update connector fields title/description | +| 0.2.7 | 2021-10-28 | [7387](https://github.com/airbytehq/airbyte/pull/7387) | Migrate source to CDK structure, add SAT testing. | +| 0.2.6 | 2021-08-26 | [5613](https://github.com/airbytehq/airbyte/pull/5613) | Add support to xlsb format | +| 0.2.5 | 2021-07-26 | [4953](https://github.com/airbytehq/airbyte/pull/4953) | Allow non-default port for SFTP type | +| 0.2.4 | 2021-06-09 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | +| 0.2.3 | 2021-06-01 | [3771](https://github.com/airbytehq/airbyte/pull/3771) | Add Azure Storage Blob Files option | +| 0.2.2 | 2021-04-16 | [2883](https://github.com/airbytehq/airbyte/pull/2883) | Fix CSV discovery memory consumption | +| 0.2.1 | 2021-04-03 | [2726](https://github.com/airbytehq/airbyte/pull/2726) | Fix base connector versioning | +| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | +| 0.1.10 | 2021-02-18 | [2118](https://github.com/airbytehq/airbyte/pull/2118) | Support JSONL format | +| 0.1.9 | 2021-02-02 | [1768](https://github.com/airbytehq/airbyte/pull/1768) | Add test cases for all formats | +| 0.1.8 | 2021-01-27 | [1738](https://github.com/airbytehq/airbyte/pull/1738) | Adopt connector best practices | +| 0.1.7 | 2020-12-16 | [1331](https://github.com/airbytehq/airbyte/pull/1331) | Refactor Python base connector | +| 0.1.6 | 2020-12-08 | [1249](https://github.com/airbytehq/airbyte/pull/1249) | Handle NaN values | +| 0.1.5 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file |