diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9b727cf9c..2d7ddad1a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -13,10 +13,10 @@ updates: interval: daily labels: - chore - open-pull-requests-limit: 8 # default is 5 + open-pull-requests-limit: 8 # default is 5 - package-ecosystem: github-actions - open-pull-requests-limit: 5 # default is 5 + open-pull-requests-limit: 5 # default is 5 directory: "/" commit-message: prefix: "ci(deps): " @@ -29,5 +29,5 @@ updates: minor-and-patch: applies-to: version-updates update-types: - - patch - - minor + - patch + - minor diff --git a/.github/workflows/connector-tests.yml b/.github/workflows/connector-tests.yml index c441fc01e..0f7033f1f 100644 --- a/.github/workflows/connector-tests.yml +++ b/.github/workflows/connector-tests.yml @@ -72,12 +72,14 @@ jobs: cdk_extra: n/a - connector: source-shopify cdk_extra: n/a - - connector: source-chargebee - cdk_extra: n/a - - connector: source-s3 - cdk_extra: file-based - - connector: destination-pinecone - cdk_extra: vector-db-based + # Chargebee is being flaky: + # - connector: source-chargebee + # cdk_extra: n/a + # These two are behind in CDK updates and can't be used as tests until they are updated: + # - connector: source-s3 + # cdk_extra: file-based + # - connector: destination-pinecone + # cdk_extra: vector-db-based - connector: destination-motherduck cdk_extra: sql # ZenDesk currently failing (as of 2024-12-02) @@ -91,7 +93,7 @@ jobs: # - connector: source-pokeapi # cdk_extra: n/a - name: "Check: '${{matrix.connector}}' (skip=${{needs.cdk_changes.outputs[matrix.cdk_extra] == 'false'}})" + name: "Check: '${{matrix.connector}}' (skip=${{needs.cdk_changes.outputs['src'] == 'false' || needs.cdk_changes.outputs[matrix.cdk_extra] == 'false'}})" steps: - name: Abort if extra not changed (${{matrix.cdk_extra}}) id: no_changes diff --git a/.github/workflows/publish_sdm_connector.yml b/.github/workflows/publish_sdm_connector.yml index 3fde1d6b3..3dcb86dea 100644 --- a/.github/workflows/publish_sdm_connector.yml +++ b/.github/workflows/publish_sdm_connector.yml @@ -8,12 +8,11 @@ on: workflow_dispatch: inputs: version: - description: - The version to publish, ie 1.0.0 or 1.0.0-dev1. - If omitted, and if run from a release branch, the version will be - inferred from the git tag. - If omitted, and if run from a non-release branch, then only a SHA-based - Docker tag will be created. + description: The version to publish, ie 1.0.0 or 1.0.0-dev1. + If omitted, and if run from a release branch, the version will be + inferred from the git tag. + If omitted, and if run from a non-release branch, then only a SHA-based + Docker tag will be created. required: false dry_run: description: If true, the workflow will not push to DockerHub. @@ -25,7 +24,6 @@ jobs: build: runs-on: ubuntu-latest steps: - - name: Detect Release Tag Version if: startsWith(github.ref, 'refs/tags/v') run: | @@ -168,7 +166,6 @@ jobs: tags: | airbyte/source-declarative-manifest:${{ env.VERSION }} - - name: Build and push ('latest' tag) # Only run if version is set and IS_PRERELEASE is false if: env.VERSION != '' && env.IS_PRERELEASE == 'false' && github.event.inputs.dry_run == 'false' diff --git a/.github/workflows/pytest_matrix.yml b/.github/workflows/pytest_matrix.yml index 740bc06c4..fce75a27b 100644 --- a/.github/workflows/pytest_matrix.yml +++ b/.github/workflows/pytest_matrix.yml @@ -12,10 +12,10 @@ on: branches: - main paths: - - 'airbyte_cdk/**' - - 'unit_tests/**' - - 'poetry.lock' - - 'pyproject.toml' + - "airbyte_cdk/**" + - "unit_tests/**" + - "poetry.lock" + - "pyproject.toml" pull_request: jobs: diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml index f437f3b7f..6dbc6b1c4 100644 --- a/.github/workflows/python_lint.yml +++ b/.github/workflows/python_lint.yml @@ -72,14 +72,7 @@ jobs: - name: Install dependencies run: poetry install --all-extras - # Job-specifc step(s): + # Job-specific step(s): - # For now, we run mypy only on modified files - - name: Get changed Python files - id: changed-py-files - uses: tj-actions/changed-files@v43 - with: - files: "airbyte_cdk/**/*.py" - - name: Run mypy on changed files - if: steps.changed-py-files.outputs.any_changed == 'true' - run: poetry run mypy ${{ steps.changed-py-files.outputs.all_changed_files }} --config-file mypy.ini --install-types --non-interactive + - name: Run mypy + run: poetry run mypy --config-file mypy.ini airbyte_cdk diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index ba0517850..232ac302f 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -25,7 +25,7 @@ from pathlib import Path from typing import Any, cast -from orjson import orjson +import orjson from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch from airbyte_cdk.models import ( @@ -72,7 +72,7 @@ def __init__( super().__init__( catalog=catalog, config=config, - state=state, + state=state, # type: ignore [arg-type] path_to_yaml="manifest.yaml", ) @@ -152,7 +152,9 @@ def handle_remote_manifest_command(args: list[str]) -> None: ) -def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource: +def create_declarative_source( + args: list[str], +) -> ConcurrentDeclarativeSource: # type: ignore [type-arg] """Creates the source with the injected config. This essentially does what other low-code sources do at build time, but at runtime, @@ -160,10 +162,14 @@ def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource: connector builder. """ try: + config: Mapping[str, Any] | None + catalog: ConfiguredAirbyteCatalog | None + state: list[AirbyteStateMessage] config, catalog, state = _parse_inputs_into_config_catalog_state(args) - if "__injected_declarative_manifest" not in config: + if config is None or "__injected_declarative_manifest" not in config: raise ValueError( - f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}" + "Invalid config: `__injected_declarative_manifest` should be provided at the root " + f"of the config but config only has keys: {list(config.keys() if config else [])}" ) return ConcurrentDeclarativeSource( config=config, diff --git a/airbyte_cdk/config_observation.py b/airbyte_cdk/config_observation.py index 9a58c0391..ae85e8277 100644 --- a/airbyte_cdk/config_observation.py +++ b/airbyte_cdk/config_observation.py @@ -10,7 +10,7 @@ from copy import copy from typing import Any, List, MutableMapping -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteControlConnectorConfigMessage, diff --git a/airbyte_cdk/connector_builder/main.py b/airbyte_cdk/connector_builder/main.py index 5d3dfb68e..e122cee8c 100644 --- a/airbyte_cdk/connector_builder/main.py +++ b/airbyte_cdk/connector_builder/main.py @@ -6,7 +6,7 @@ import sys from typing import Any, List, Mapping, Optional, Tuple -from orjson import orjson +import orjson from airbyte_cdk.connector import BaseConnector from airbyte_cdk.connector_builder.connector_builder_handler import ( diff --git a/airbyte_cdk/connector_builder/message_grouper.py b/airbyte_cdk/connector_builder/message_grouper.py index aa3a42931..ce43afab8 100644 --- a/airbyte_cdk/connector_builder/message_grouper.py +++ b/airbyte_cdk/connector_builder/message_grouper.py @@ -71,7 +71,7 @@ def _cursor_field_to_nested_and_composite_field( is_nested_key = isinstance(field[0], str) if is_nested_key: - return [field] # type: ignore # the type of field is expected to be List[str] here + return [field] raise ValueError(f"Unknown type for cursor field `{field}") @@ -232,9 +232,9 @@ def _get_message_groups( current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message current_slice_pages = [] at_least_one_page_in_group = False - elif message.type == MessageType.LOG and message.log.message.startswith( + elif message.type == MessageType.LOG and message.log.message.startswith( # type: ignore[union-attr] # None doesn't have 'message' SliceLogger.SLICE_LOG_PREFIX - ): # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message + ): # parsing the first slice current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message elif message.type == MessageType.LOG: @@ -274,14 +274,14 @@ def _get_message_groups( if message.trace.type == TraceType.ERROR: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has trace.type yield message.trace elif message.type == MessageType.RECORD: - current_page_records.append(message.record.data) # type: ignore[union-attr] # AirbyteMessage with MessageType.RECORD has record.data + current_page_records.append(message.record.data) # type: ignore[arg-type, union-attr] # AirbyteMessage with MessageType.RECORD has record.data records_count += 1 schema_inferrer.accumulate(message.record) datetime_format_inferrer.accumulate(message.record) elif ( message.type == MessageType.CONTROL - and message.control.type == OrchestratorType.CONNECTOR_CONFIG - ): # type: ignore[union-attr] # AirbyteMessage with MessageType.CONTROL has control.type + and message.control.type == OrchestratorType.CONNECTOR_CONFIG # type: ignore[union-attr] # None doesn't have 'type' + ): yield message.control elif message.type == MessageType.STATE: latest_state_message = message.state # type: ignore[assignment] @@ -310,8 +310,8 @@ def _need_to_close_page( and message.type == MessageType.LOG and ( MessageGrouper._is_page_http_request(json_message) - or message.log.message.startswith("slice:") - ) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message + or message.log.message.startswith("slice:") # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message + ) ) @staticmethod @@ -355,8 +355,8 @@ def _close_page( StreamReadPages( request=current_page_request, response=current_page_response, - records=deepcopy(current_page_records), - ) # type: ignore + records=deepcopy(current_page_records), # type: ignore [arg-type] + ) ) current_page_records.clear() diff --git a/airbyte_cdk/destinations/destination.py b/airbyte_cdk/destinations/destination.py index f1d799945..547f96684 100644 --- a/airbyte_cdk/destinations/destination.py +++ b/airbyte_cdk/destinations/destination.py @@ -9,7 +9,7 @@ from abc import ABC, abstractmethod from typing import Any, Iterable, List, Mapping -from orjson import orjson +import orjson from airbyte_cdk.connector import Connector from airbyte_cdk.exception_handler import init_uncaught_exception_handler diff --git a/airbyte_cdk/destinations/vector_db_based/embedder.py b/airbyte_cdk/destinations/vector_db_based/embedder.py index a7610aea4..6889c8e16 100644 --- a/airbyte_cdk/destinations/vector_db_based/embedder.py +++ b/airbyte_cdk/destinations/vector_db_based/embedder.py @@ -107,7 +107,7 @@ def embedding_dimensions(self) -> int: class OpenAIEmbedder(BaseOpenAIEmbedder): def __init__(self, config: OpenAIEmbeddingConfigModel, chunk_size: int): super().__init__( - OpenAIEmbeddings( + OpenAIEmbeddings( # type: ignore [call-arg] openai_api_key=config.openai_key, max_retries=15, disallowed_special=() ), chunk_size, @@ -118,7 +118,7 @@ class AzureOpenAIEmbedder(BaseOpenAIEmbedder): def __init__(self, config: AzureOpenAIEmbeddingConfigModel, chunk_size: int): # Azure OpenAI API has — as of 20230927 — a limit of 16 documents per request super().__init__( - OpenAIEmbeddings( + OpenAIEmbeddings( # type: ignore [call-arg] openai_api_key=config.openai_key, chunk_size=16, max_retries=15, diff --git a/airbyte_cdk/destinations/vector_db_based/writer.py b/airbyte_cdk/destinations/vector_db_based/writer.py index 268e49ef7..45c7c7326 100644 --- a/airbyte_cdk/destinations/vector_db_based/writer.py +++ b/airbyte_cdk/destinations/vector_db_based/writer.py @@ -83,11 +83,19 @@ def write( yield message elif message.type == Type.RECORD: record_chunks, record_id_to_delete = self.processor.process(message.record) - self.chunks[(message.record.namespace, message.record.stream)].extend(record_chunks) - if record_id_to_delete is not None: - self.ids_to_delete[(message.record.namespace, message.record.stream)].append( - record_id_to_delete + self.chunks[ + ( # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]" + message.record.namespace, # type: ignore [union-attr] # record not None + message.record.stream, # type: ignore [union-attr] # record not None ) + ].extend(record_chunks) + if record_id_to_delete is not None: + self.ids_to_delete[ + ( # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]" + message.record.namespace, # type: ignore [union-attr] # record not None + message.record.stream, # type: ignore [union-attr] # record not None + ) + ].append(record_id_to_delete) self.number_of_chunks += len(record_chunks) if self.number_of_chunks >= self.batch_size: self._process_batch() diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index 1b8ba6396..b37c814fa 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -22,7 +22,7 @@ from airbyte_cdk.connector import TConfig from airbyte_cdk.exception_handler import init_uncaught_exception_handler from airbyte_cdk.logger import init_logger -from airbyte_cdk.models import ( # type: ignore [attr-defined] +from airbyte_cdk.models import ( AirbyteConnectionStatus, AirbyteMessage, AirbyteMessageSerializer, @@ -255,9 +255,10 @@ def handle_record_counts( stream_message_count[ HashableStreamDescriptor( - name=message.record.stream, namespace=message.record.namespace + name=message.record.stream, # type: ignore[union-attr] # record has `stream` + namespace=message.record.namespace, # type: ignore[union-attr] # record has `namespace` ) - ] += 1.0 # type: ignore[union-attr] # record has `stream` and `namespace` + ] += 1.0 case Type.STATE: if message.state is None: raise ValueError("State message must have a state attribute") @@ -266,9 +267,9 @@ def handle_record_counts( # Set record count from the counter onto the state message message.state.sourceStats = message.state.sourceStats or AirbyteStateStats() # type: ignore[union-attr] # state has `sourceStats` - message.state.sourceStats.recordCount = stream_message_count.get( + message.state.sourceStats.recordCount = stream_message_count.get( # type: ignore[union-attr] # state has `sourceStats` stream_descriptor, 0.0 - ) # type: ignore[union-attr] # state has `sourceStats` + ) # Reset the counter stream_message_count[stream_descriptor] = 0.0 @@ -290,7 +291,7 @@ def set_up_secret_filter(config: TConfig, connection_specification: Mapping[str, @staticmethod def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str: - return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode() # type: ignore[no-any-return] # orjson.dumps(message).decode() always returns string + return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode() @classmethod def extract_state(cls, args: List[str]) -> Optional[Any]: diff --git a/airbyte_cdk/logger.py b/airbyte_cdk/logger.py index 8b7f288b3..78061b605 100644 --- a/airbyte_cdk/logger.py +++ b/airbyte_cdk/logger.py @@ -7,7 +7,7 @@ import logging.config from typing import Any, Callable, Mapping, Optional, Tuple -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteLogMessage, @@ -78,7 +78,7 @@ def format(self, record: logging.LogRecord) -> str: log_message = AirbyteMessage( type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message) ) - return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode() # type: ignore[no-any-return] # orjson.dumps(message).decode() always returns string + return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode() @staticmethod def extract_extra_args_from_record(record: logging.LogRecord) -> Mapping[str, Any]: diff --git a/airbyte_cdk/sources/abstract_source.py b/airbyte_cdk/sources/abstract_source.py index 34ba816bc..ab9ee48b8 100644 --- a/airbyte_cdk/sources/abstract_source.py +++ b/airbyte_cdk/sources/abstract_source.py @@ -200,7 +200,7 @@ def read( if len(stream_name_to_exception) > 0: error_message = generate_failed_streams_error_message( {key: [value] for key, value in stream_name_to_exception.items()} - ) # type: ignore # for some reason, mypy can't figure out the types for key and value + ) logger.info(error_message) # We still raise at least one exception when a stream raises an exception because the platform currently relies # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error diff --git a/airbyte_cdk/sources/config.py b/airbyte_cdk/sources/config.py index 8679ebbb7..ea91b17f3 100644 --- a/airbyte_cdk/sources/config.py +++ b/airbyte_cdk/sources/config.py @@ -24,4 +24,4 @@ def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]: rename_key(schema, old_key="anyOf", new_key="oneOf") # UI supports only oneOf expand_refs(schema) schema.pop("description", None) # description added from the docstring - return schema # type: ignore[no-any-return] + return schema diff --git a/airbyte_cdk/sources/connector_state_manager.py b/airbyte_cdk/sources/connector_state_manager.py index 56b581279..914374a55 100644 --- a/airbyte_cdk/sources/connector_state_manager.py +++ b/airbyte_cdk/sources/connector_state_manager.py @@ -4,7 +4,7 @@ import copy from dataclasses import dataclass -from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union +from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union, cast from airbyte_cdk.models import ( AirbyteMessage, @@ -15,6 +15,7 @@ StreamDescriptor, ) from airbyte_cdk.models import Type as MessageType +from airbyte_cdk.models.airbyte_protocol import AirbyteGlobalState, AirbyteStateBlob @dataclass(frozen=True) @@ -118,8 +119,12 @@ def _extract_from_state_message( is_global = cls._is_global_state(state) if is_global: - global_state = state[0].global_ # type: ignore # We verified state is a list in _is_global_state - shared_state = copy.deepcopy(global_state.shared_state, {}) # type: ignore[union-attr] # global_state has shared_state + # We already validate that this is a global state message, not None: + global_state = cast(AirbyteGlobalState, state[0].global_) + # global_state has shared_state, also not None: + shared_state: AirbyteStateBlob = cast( + AirbyteStateBlob, copy.deepcopy(global_state.shared_state, {}) + ) streams = { HashableStreamDescriptor( name=per_stream_state.stream_descriptor.name, @@ -131,7 +136,7 @@ def _extract_from_state_message( else: streams = { HashableStreamDescriptor( - name=per_stream_state.stream.stream_descriptor.name, + name=per_stream_state.stream.stream_descriptor.name, # type: ignore[union-attr] # stream has stream_descriptor namespace=per_stream_state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # stream has stream_descriptor ): per_stream_state.stream.stream_state # type: ignore[union-attr] # stream has stream_state for per_stream_state in state diff --git a/airbyte_cdk/sources/declarative/auth/oauth.py b/airbyte_cdk/sources/declarative/auth/oauth.py index 18bcc9fce..8ec671f3e 100644 --- a/airbyte_cdk/sources/declarative/auth/oauth.py +++ b/airbyte_cdk/sources/declarative/auth/oauth.py @@ -135,7 +135,7 @@ def get_grant_type(self) -> str: return self.grant_type.eval(self.config) # type: ignore # eval returns a string in this context def get_refresh_request_body(self) -> Mapping[str, Any]: - return self._refresh_request_body.eval(self.config) # type: ignore # eval should return a Mapping in this context + return self._refresh_request_body.eval(self.config) def get_token_expiry_date(self) -> pendulum.DateTime: return self._token_expiry_date # type: ignore # _token_expiry_date is a pendulum.DateTime. It is never None despite what mypy thinks diff --git a/airbyte_cdk/sources/declarative/auth/selective_authenticator.py b/airbyte_cdk/sources/declarative/auth/selective_authenticator.py index 9f8a55193..bc276cb99 100644 --- a/airbyte_cdk/sources/declarative/auth/selective_authenticator.py +++ b/airbyte_cdk/sources/declarative/auth/selective_authenticator.py @@ -28,7 +28,12 @@ def __new__( # type: ignore[misc] **kwargs: Any, ) -> DeclarativeAuthenticator: try: - selected_key = str(dpath.get(config, authenticator_selection_path)) + selected_key = str( + dpath.get( + config, # type: ignore [arg-type] # Dpath wants mutable mapping but doesn't need it. + authenticator_selection_path, + ) + ) except KeyError as err: raise ValueError( "The path from `authenticator_selection_path` is not found in the config." diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py index 85bce965d..7ee4d287e 100644 --- a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py +++ b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py @@ -128,7 +128,7 @@ def __init__( initial_number_of_partitions_to_generate=initial_number_of_partitions_to_generate, logger=self.logger, slice_logger=self._slice_logger, - message_repository=self.message_repository, # type: ignore # message_repository is always instantiated with a value by factory + message_repository=self.message_repository, ) def read( diff --git a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index 1edf92432..eb407db44 100644 --- a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -41,12 +41,12 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.datetime = InterpolatedString.create(self.datetime, parameters=parameters or {}) self._parser = DatetimeParser() self.min_datetime = ( - InterpolatedString.create(self.min_datetime, parameters=parameters) + InterpolatedString.create(self.min_datetime, parameters=parameters) # type: ignore [assignment] # expression has type "InterpolatedString | None", variable has type "InterpolatedString | str" if self.min_datetime else None ) # type: ignore self.max_datetime = ( - InterpolatedString.create(self.max_datetime, parameters=parameters) + InterpolatedString.create(self.max_datetime, parameters=parameters) # type: ignore [assignment] # expression has type "InterpolatedString | None", variable has type "InterpolatedString | str" if self.max_datetime else None ) # type: ignore @@ -66,7 +66,13 @@ def get_datetime( datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z" time = self._parser.parse( - str(self.datetime.eval(config, **additional_parameters)), datetime_format + str( + self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval" + config, + **additional_parameters, + ) + ), + datetime_format, ) # type: ignore # datetime is always cast to an interpolated string if self.min_datetime: @@ -105,7 +111,7 @@ def create( if isinstance(interpolated_string_or_min_max_datetime, InterpolatedString) or isinstance( interpolated_string_or_min_max_datetime, str ): - return MinMaxDatetime( + return MinMaxDatetime( # type: ignore [call-arg] datetime=interpolated_string_or_min_max_datetime, parameters=parameters ) else: diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 2bf13d3df..0dc0032dd 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -2057,7 +2057,7 @@ definitions: The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc. The placeholders are replaced during the processing to provide neccessary values. examples: - - access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}} + - access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}} access_token_headers: title: (Optional) DeclarativeOAuth Access Token Headers type: object @@ -2065,9 +2065,10 @@ definitions: description: |- The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step. examples: - - access_token_headers: { - "Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}" - } + - access_token_headers: + { + "Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}", + } access_token_params: title: (Optional) DeclarativeOAuth Access Token Query Params (Json Encoded) type: object @@ -2076,18 +2077,19 @@ definitions: The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step. When this property is provided, the query params will be encoded as `Json` and included in the outgoing API request. examples: - - access_token_params: { - "{auth_code_key}": "{{auth_code_key}}", - "{client_id_key}": "{{client_id_key}}", - "{client_secret_key}": "{{client_secret_key}}" - } + - access_token_params: + { + "{auth_code_key}": "{{auth_code_key}}", + "{client_id_key}": "{{client_id_key}}", + "{client_secret_key}": "{{client_secret_key}}", + } extract_output: title: DeclarativeOAuth Extract Output type: array items: type: string description: |- - The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config. + The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config. examples: - extract_output: ["access_token", "refresh_token", "other_field"] state: @@ -2099,17 +2101,14 @@ definitions: - max description: |- The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed, - including length and complexity. + including length and complexity. properties: min: type: integer max: type: integer examples: - - state: { - "min": 7, - "max": 128, - } + - state: { "min": 7, "max": 128 } client_id_key: title: (Optional) DeclarativeOAuth Client ID Key Override type: string @@ -2135,14 +2134,14 @@ definitions: title: (Optional) DeclarativeOAuth State Key Override type: string description: |- - The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider. + The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider. examples: - state_key: "my_custom_state_key_key_name" auth_code_key: title: (Optional) DeclarativeOAuth Auth Code Key Override type: string description: |- - The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider. + The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider. examples: - auth_code_key: "my_custom_auth_code_key_name" redirect_uri_key: diff --git a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py index 13281026d..cf0bc56eb 100644 --- a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py @@ -14,5 +14,8 @@ class NoopDecoder(Decoder): def is_stream_response(self) -> bool: return False - def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]: + def decode( # type: ignore[override] # Signature doesn't match base class + self, + response: requests.Response, + ) -> Generator[Mapping[str, Any], None, None]: yield from [{}] diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 03998c479..d6d329aec 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -133,8 +133,8 @@ def set_initial_state(self, stream_state: StreamState) -> None: :param stream_state: The state of the stream as returned by get_stream_state """ self._cursor = ( - stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None - ) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__ + stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None # type: ignore [union-attr] + ) def observe(self, stream_slice: StreamSlice, record: Record) -> None: """ @@ -158,8 +158,10 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: ) if ( self._is_within_daterange_boundaries( - record, stream_slice.get(start_field), stream_slice.get(end_field) - ) # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date + record, + stream_slice.get(start_field), # type: ignore [arg-type] + stream_slice.get(end_field), # type: ignore [arg-type] + ) and is_highest_observed_cursor_value ): self._highest_observed_cursor_field_value = record_cursor_value @@ -368,9 +370,9 @@ def _get_request_options( self._partition_field_start.eval(self.config) ) if self.end_time_option and self.end_time_option.inject_into == option_type: - options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( + options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr] self._partition_field_end.eval(self.config) - ) # type: ignore # field_name is always casted to an interpolated string + ) return options def should_be_synced(self, record: Record) -> bool: diff --git a/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte_cdk/sources/declarative/interpolation/jinja.py index ecbe9a349..ec5e861cd 100644 --- a/airbyte_cdk/sources/declarative/interpolation/jinja.py +++ b/airbyte_cdk/sources/declarative/interpolation/jinja.py @@ -27,7 +27,7 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment): def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool: if attr in ["_partition"]: return True - return super().is_safe_attribute(obj, attr, value) # type: ignore # for some reason, mypy says 'Returning Any from function declared to return "bool"' + return super().is_safe_attribute(obj, attr, value) class JinjaInterpolation(Interpolation): @@ -132,7 +132,7 @@ def _eval(self, s: Optional[str], context: Mapping[str, Any]) -> Optional[str]: return s @cache - def _find_undeclared_variables(self, s: Optional[str]) -> Template: + def _find_undeclared_variables(self, s: Optional[str]) -> set[str]: """ Find undeclared variables and cache them """ @@ -144,4 +144,4 @@ def _compile(self, s: Optional[str]) -> Template: """ We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader """ - return self._environment.from_string(s) + return self._environment.from_string(s) # type: ignore [arg-type] # Expected `str | Template` but passed `str | None` diff --git a/airbyte_cdk/sources/declarative/interpolation/macros.py b/airbyte_cdk/sources/declarative/interpolation/macros.py index ce448c127..e786f0116 100644 --- a/airbyte_cdk/sources/declarative/interpolation/macros.py +++ b/airbyte_cdk/sources/declarative/interpolation/macros.py @@ -116,7 +116,7 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]: Usage: `"{{ now_utc() - duration('P1D') }}"` """ - return parse_duration(datestring) # type: ignore # mypy thinks this returns Any for some reason + return parse_duration(datestring) def format_datetime( diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 1712cb67c..2da84ff68 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -396,7 +396,7 @@ def __init__( self._disable_retries = disable_retries self._disable_cache = disable_cache self._disable_resumable_full_refresh = disable_resumable_full_refresh - self._message_repository = message_repository or InMemoryMessageRepository( # type: ignore + self._message_repository = message_repository or InMemoryMessageRepository( self._evaluate_log_level(emit_connector_builder_messages) ) @@ -644,7 +644,7 @@ def create_legacy_to_per_partition_state_migration( declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. config, declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] - ) # type: ignore # The retriever type was already checked + ) def create_session_token_authenticator( self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any @@ -674,7 +674,7 @@ def create_session_token_authenticator( return ModelToComponentFactory.create_bearer_authenticator( BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value config, - token_provider=token_provider, # type: ignore # $parameters defaults to None + token_provider=token_provider, ) else: return ModelToComponentFactory.create_api_key_authenticator( @@ -821,7 +821,6 @@ def create_concurrent_cursor_from_datetime_based_cursor( input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, is_sequential_state=True, cursor_granularity=cursor_granularity, - # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice ) start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] @@ -894,7 +893,7 @@ def create_concurrent_cursor_from_datetime_based_cursor( stream_name=stream_name, stream_namespace=stream_namespace, stream_state=stream_state, - message_repository=self._message_repository, # type: ignore # message_repository is always instantiated with a value by factory + message_repository=self._message_repository, connector_state_manager=state_manager, connector_state_converter=connector_state_converter, cursor_field=cursor_field, @@ -1705,7 +1704,7 @@ def create_oauth_authenticator( refresh_token=model.refresh_token, scopes=model.scopes, token_expiry_date=model.token_expiry_date, - token_expiry_date_format=model.token_expiry_date_format, # type: ignore + token_expiry_date_format=model.token_expiry_date_format, token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), token_refresh_endpoint=model.token_refresh_endpoint, config=config, diff --git a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index 28925ae6a..1c7bb6961 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -130,11 +130,11 @@ def _get_request_option( if value: params.update( { - parent_config.request_option.field_name.eval( + parent_config.request_option.field_name.eval( # type: ignore [union-attr] config=self.config ): value } - ) # type: ignore # field_name is always casted to an interpolated string + ) return params def stream_slices(self) -> Iterable[StreamSlice]: @@ -162,9 +162,9 @@ def stream_slices(self) -> Iterable[StreamSlice]: extra_fields = None if parent_stream_config.extra_fields: extra_fields = [ - [field_path_part.eval(self.config) for field_path_part in field_path] + [field_path_part.eval(self.config) for field_path_part in field_path] # type: ignore [union-attr] for field_path in parent_stream_config.extra_fields - ] # type: ignore # extra_fields is always casted to an interpolated string + ] # read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does # not support either substreams or RFR, but something that needs to be considered once we do @@ -192,7 +192,10 @@ def stream_slices(self) -> Iterable[StreamSlice]: message=f"Parent stream returned records as invalid type {type(parent_record)}" ) try: - partition_value = dpath.get(parent_record, parent_field) + partition_value = dpath.get( + parent_record, # type: ignore [arg-type] + parent_field, + ) except KeyError: continue @@ -228,7 +231,10 @@ def _extract_extra_fields( if extra_fields: for extra_field_path in extra_fields: try: - extra_field_value = dpath.get(parent_record, extra_field_path) + extra_field_value = dpath.get( + parent_record, # type: ignore [arg-type] + extra_field_path, + ) self.logger.debug( f"Extracted extra_field_path: {extra_field_path} with value: {extra_field_value}" ) @@ -291,7 +297,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: if not parent_state and incremental_dependency: # Attempt to retrieve child state substream_state = list(stream_state.values()) - substream_state = substream_state[0] if substream_state else {} + substream_state = substream_state[0] if substream_state else {} # type: ignore [assignment] # Incorrect type for assignment parent_state = {} # Copy child state to parent streams with incremental dependencies diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py index 1340e8595..b70ceaaeb 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py @@ -141,7 +141,7 @@ def backoff_time( for backoff_strategy in self.backoff_strategies: backoff = backoff_strategy.backoff_time( response_or_exception=response_or_exception, attempt_count=attempt_count - ) # type: ignore # attempt_count maintained for compatibility with low code CDK + ) if backoff: return backoff return backoff diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py index 82dfb661b..a2fc80007 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py @@ -151,21 +151,23 @@ def _create_error_message(self, response: requests.Response) -> Optional[str]: :param response: The HTTP response which can be used during interpolation :return: The evaluated error message string to be emitted """ - return self.error_message.eval( + return self.error_message.eval( # type: ignore [no-any-return, union-attr] self.config, response=self._safe_response_json(response), headers=response.headers - ) # type: ignore # error_message is always cast to an interpolated string + ) def _response_matches_predicate(self, response: requests.Response) -> bool: return ( bool( - self.predicate.condition - and self.predicate.eval( - None, response=self._safe_response_json(response), headers=response.headers + self.predicate.condition # type: ignore [union-attr] + and self.predicate.eval( # type: ignore [union-attr] + None, # type: ignore [arg-type] + response=self._safe_response_json(response), + headers=response.headers, ) ) if self.predicate else False - ) # type: ignore # predicate is always cast to an interpolated string + ) def _response_contains_error_message(self, response: requests.Response) -> bool: if not self.error_message_contains: diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py index c660f0327..e9476447a 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py @@ -194,7 +194,7 @@ def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping and self.pagination_strategy.get_page_size() and self.page_size_option.inject_into == option_type ): - options[self.page_size_option.field_name.eval(config=self.config)] = ( + options[self.page_size_option.field_name.eval(config=self.config)] = ( # type: ignore [union-attr] self.pagination_strategy.get_page_size() ) # type: ignore # field_name is always cast to an interpolated string return options diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py index 5ce7c9a3d..05e06db71 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py @@ -85,7 +85,7 @@ def _get_request_options( self._partition_field_start.eval(self.config) ) if self.end_time_option and self.end_time_option.inject_into == option_type: - options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( + options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr] self._partition_field_end.eval(self.config) - ) # type: ignore # field_name is always casted to an interpolated string + ) return options diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py index e8e4ee643..c327b83da 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py @@ -5,7 +5,7 @@ from dataclasses import InitVar, dataclass, field from typing import Any, Mapping, MutableMapping, Optional, Union -from deprecated import deprecated +from typing_extensions import deprecated from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import ( diff --git a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py index 5cea85bcb..3d9a3ead9 100644 --- a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py @@ -4,7 +4,7 @@ from dataclasses import InitVar, dataclass, field from typing import Any, Callable, Iterable, Mapping, Optional -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import FailureType from airbyte_cdk.sources.declarative.async_job.job_orchestrator import ( @@ -21,7 +21,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) @dataclass class AsyncRetriever(Retriever): config: Config diff --git a/airbyte_cdk/sources/declarative/spec/spec.py b/airbyte_cdk/sources/declarative/spec/spec.py index 05fa079bf..914e99e93 100644 --- a/airbyte_cdk/sources/declarative/spec/spec.py +++ b/airbyte_cdk/sources/declarative/spec/spec.py @@ -9,7 +9,7 @@ AdvancedAuth, ConnectorSpecification, ConnectorSpecificationSerializer, -) # type: ignore [attr-defined] +) from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow diff --git a/airbyte_cdk/sources/embedded/base_integration.py b/airbyte_cdk/sources/embedded/base_integration.py index c2e67408e..77917b0a1 100644 --- a/airbyte_cdk/sources/embedded/base_integration.py +++ b/airbyte_cdk/sources/embedded/base_integration.py @@ -52,8 +52,9 @@ def _load_data( for message in self.source.read(self.config, configured_catalog, state): if message.type == Type.RECORD: output = self._handle_record( - message.record, get_defined_id(stream, message.record.data) - ) # type: ignore[union-attr] # record has `data` + message.record, + get_defined_id(stream, message.record.data), # type: ignore[union-attr, arg-type] + ) if output: yield output elif message.type is Type.STATE and message.state: diff --git a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py index c0234ca17..12e1740b6 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py @@ -2,6 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from __future__ import annotations + import logging from abc import abstractmethod from typing import TYPE_CHECKING, Optional, Tuple @@ -22,8 +24,11 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy): @abstractmethod - def check_availability( - self, stream: Stream, logger: logging.Logger, _: Optional[Source] + def check_availability( # type: ignore[override] # Signature doesn't match base class + self, + stream: Stream, + logger: logging.Logger, + _: Optional[Source], ) -> Tuple[bool, Optional[str]]: """ Perform a connection check for the stream. @@ -34,7 +39,10 @@ def check_availability( @abstractmethod def check_availability_and_parsability( - self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source] + self, + stream: AbstractFileBasedStream, + logger: logging.Logger, + _: Optional[Source], ) -> Tuple[bool, Optional[str]]: """ Performs a connection check for the stream, as well as additional checks that @@ -46,7 +54,7 @@ def check_availability_and_parsability( class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy): - def __init__(self, stream: "AbstractFileBasedStream"): + def __init__(self, stream: AbstractFileBasedStream) -> None: self.stream = stream def check_availability(self, logger: logging.Logger) -> StreamAvailability: diff --git a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py index cf985d9ee..c9d416a72 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py @@ -2,6 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from __future__ import annotations + import logging import traceback from typing import TYPE_CHECKING, Optional, Tuple @@ -25,12 +27,15 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy): - def __init__(self, stream_reader: AbstractFileBasedStreamReader): + def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None: self.stream_reader = stream_reader - def check_availability( - self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source] - ) -> Tuple[bool, Optional[str]]: # type: ignore[override] + def check_availability( # type: ignore[override] # Signature doesn't match base class + self, + stream: AbstractFileBasedStream, + logger: logging.Logger, + _: Optional[Source], + ) -> Tuple[bool, Optional[str]]: """ Perform a connection check for the stream (verify that we can list files from the stream). @@ -44,7 +49,10 @@ def check_availability( return True, None def check_availability_and_parsability( - self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source] + self, + stream: AbstractFileBasedStream, + logger: logging.Logger, + _: Optional[Source], ) -> Tuple[bool, Optional[str]]: """ Perform a connection check for the stream. @@ -82,7 +90,7 @@ def check_availability_and_parsability( return True, None - def _check_list_files(self, stream: "AbstractFileBasedStream") -> RemoteFile: + def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile: """ Check that we can list files from the stream. @@ -102,7 +110,10 @@ def _check_list_files(self, stream: "AbstractFileBasedStream") -> RemoteFile: return file def _check_parse_record( - self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger + self, + stream: AbstractFileBasedStream, + file: RemoteFile, + logger: logging.Logger, ) -> None: parser = stream.get_parser() diff --git a/airbyte_cdk/sources/file_based/file_types/avro_parser.py b/airbyte_cdk/sources/file_based/file_types/avro_parser.py index 61e3a2c82..e1aa2c4cb 100644 --- a/airbyte_cdk/sources/file_based/file_types/avro_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/avro_parser.py @@ -3,7 +3,7 @@ # import logging -from typing import Any, Dict, Iterable, Mapping, Optional, Tuple +from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, cast import fastavro @@ -64,18 +64,20 @@ async def infer_schema( raise ValueError(f"Expected ParquetFormat, got {avro_format}") with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp: - avro_reader = fastavro.reader(fp) + avro_reader = fastavro.reader(fp) # type: ignore [arg-type] avro_schema = avro_reader.writer_schema - if not avro_schema["type"] == "record": - unsupported_type = avro_schema["type"] + if not avro_schema["type"] == "record": # type: ignore [index, call-overload] + unsupported_type = avro_schema["type"] # type: ignore [index, call-overload] raise ValueError( f"Only record based avro files are supported. Found {unsupported_type}" ) json_schema = { - field["name"]: AvroParser._convert_avro_type_to_json( - avro_format, field["name"], field["type"] + field["name"]: AvroParser._convert_avro_type_to_json( # type: ignore [index] + avro_format, + field["name"], # type: ignore [index] + field["type"], # type: ignore [index] ) - for field in avro_schema["fields"] + for field in avro_schema["fields"] # type: ignore [index, call-overload] } return json_schema @@ -180,18 +182,19 @@ def parse_records( line_no = 0 try: with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp: - avro_reader = fastavro.reader(fp) + avro_reader = fastavro.reader(fp) # type: ignore [arg-type] schema = avro_reader.writer_schema schema_field_name_to_type = { - field["name"]: field["type"] for field in schema["fields"] + field["name"]: cast(dict[str, Any], field["type"]) # type: ignore [index] + for field in schema["fields"] # type: ignore [index, call-overload] # If schema is not dict, it is not subscriptable by strings } for record in avro_reader: line_no += 1 yield { record_field: self._to_output_value( avro_format, - schema_field_name_to_type[record_field], - record[record_field], + schema_field_name_to_type[record_field], # type: ignore [index] # Any not subscriptable + record[record_field], # type: ignore [index] # Any not subscriptable ) for record_field, record_value in schema_field_name_to_type.items() } diff --git a/airbyte_cdk/sources/file_based/file_types/csv_parser.py b/airbyte_cdk/sources/file_based/file_types/csv_parser.py index 1b7fcfed5..e3010690e 100644 --- a/airbyte_cdk/sources/file_based/file_types/csv_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/csv_parser.py @@ -12,7 +12,7 @@ from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple from uuid import uuid4 -from orjson import orjson +import orjson from airbyte_cdk.models import FailureType from airbyte_cdk.sources.file_based.config.csv_format import ( @@ -117,7 +117,7 @@ def _get_headers(self, fp: IOBase, config_format: CsvFormat, dialect_name: str) """ # Note that this method assumes the dialect has already been registered if we're parsing the headers if isinstance(config_format.header_definition, CsvHeaderUserProvided): - return config_format.header_definition.column_names # type: ignore # should be CsvHeaderUserProvided given the type + return config_format.header_definition.column_names if isinstance(config_format.header_definition, CsvHeaderAutogenerated): self._skip_rows( @@ -229,7 +229,7 @@ def parse_records( if discovered_schema: property_types = { col: prop["type"] for col, prop in discovered_schema["properties"].items() - } # type: ignore # discovered_schema["properties"] is known to be a mapping + } deduped_property_types = CsvParser._pre_propcess_property_types(property_types) else: deduped_property_types = {} diff --git a/airbyte_cdk/sources/file_based/file_types/excel_parser.py b/airbyte_cdk/sources/file_based/file_types/excel_parser.py index a95df6033..5a0332171 100644 --- a/airbyte_cdk/sources/file_based/file_types/excel_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/excel_parser.py @@ -7,10 +7,10 @@ from pathlib import Path from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union +import orjson import pandas as pd from numpy import datetime64, issubdtype from numpy import dtype as dtype_ -from orjson import orjson from pydantic.v1 import BaseModel from airbyte_cdk.sources.file_based.config.file_based_stream_config import ( @@ -69,8 +69,11 @@ async def infer_schema( df = self.open_and_parse_file(fp) for column, df_type in df.dtypes.items(): # Choose the broadest data type if the column's data type differs in dataframes - prev_frame_column_type = fields.get(column) - fields[column] = self.dtype_to_json_type(prev_frame_column_type, df_type) + prev_frame_column_type = fields.get(column) # type: ignore [call-overload] + fields[column] = self.dtype_to_json_type( # type: ignore [index] + prev_frame_column_type, + df_type, + ) schema = { field: ( @@ -136,7 +139,10 @@ def file_read_mode(self) -> FileReadMode: return FileReadMode.READ_BINARY @staticmethod - def dtype_to_json_type(current_type: Optional[str], dtype: dtype_) -> str: + def dtype_to_json_type( + current_type: Optional[str], + dtype: dtype_, # type: ignore [type-arg] + ) -> str: """ Convert Pandas DataFrame types to Airbyte Types. @@ -187,4 +193,4 @@ def open_and_parse_file(fp: Union[IOBase, str, Path]) -> pd.DataFrame: Returns: pd.DataFrame: Parsed data from the Excel file. """ - return pd.ExcelFile(fp, engine="calamine").parse() + return pd.ExcelFile(fp, engine="calamine").parse() # type: ignore [arg-type, call-overload, no-any-return] diff --git a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py index 67132dd87..722ad329b 100644 --- a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py @@ -6,7 +6,7 @@ import logging from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union -from orjson import orjson +import orjson from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError diff --git a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py index 5423ffa9f..ef258b34d 100644 --- a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py @@ -6,7 +6,7 @@ from functools import cache, cached_property, lru_cache from typing import Any, Dict, Iterable, List, Mapping, Optional, Type -from deprecated import deprecated +from typing_extensions import deprecated from airbyte_cdk import AirbyteMessage from airbyte_cdk.models import SyncMode @@ -179,7 +179,7 @@ def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool: ) @cached_property - @deprecated(version="3.7.0") + @deprecated("Deprecated as of CDK version 3.7.0.") def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy: return self._availability_strategy diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py index ca4671eda..fb0efc82c 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py @@ -7,7 +7,7 @@ from functools import cache, lru_cache from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import ( AirbyteLogMessage, @@ -56,7 +56,10 @@ """ -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBasedStream): @classmethod def create_from_stream( @@ -143,7 +146,7 @@ def supports_incremental(self) -> bool: return self._legacy_stream.supports_incremental @property - @deprecated(version="3.7.0") + @deprecated("Deprecated as of CDK version 3.7.0.") def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy: return self._legacy_stream.availability_strategy diff --git a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py index 814bc1a1e..08ad8c3ae 100644 --- a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py @@ -21,7 +21,7 @@ class DefaultFileBasedCursor(AbstractFileBasedCursor): CURSOR_FIELD = "_ab_source_file_last_modified" def __init__(self, stream_config: FileBasedStreamConfig, **_: Any): - super().__init__(stream_config) + super().__init__(stream_config) # type: ignore [safe-super] self._file_to_datetime_history: MutableMapping[str, str] = {} self._time_window_if_history_is_full = timedelta( days=stream_config.days_to_sync_if_history_is_full diff --git a/airbyte_cdk/sources/http_logger.py b/airbyte_cdk/sources/http_logger.py index 58d6aed30..33ccc68ac 100644 --- a/airbyte_cdk/sources/http_logger.py +++ b/airbyte_cdk/sources/http_logger.py @@ -14,7 +14,7 @@ def format_http_message( title: str, description: str, stream_name: Optional[str], - is_auxiliary: bool = None, + is_auxiliary: bool | None = None, ) -> LogMessage: request = response.request log_message = { @@ -42,10 +42,10 @@ def format_http_message( "url": {"full": request.url}, } if is_auxiliary is not None: - log_message["http"]["is_auxiliary"] = is_auxiliary + log_message["http"]["is_auxiliary"] = is_auxiliary # type: ignore [index] if stream_name: log_message["airbyte_cdk"] = {"stream": {"name": stream_name}} - return log_message + return log_message # type: ignore [return-value] # got "dict[str, object]", expected "dict[str, JsonType]" def _normalize_body_string(body_str: Optional[Union[str, bytes]]) -> Optional[str]: diff --git a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py index decf645b7..26e6f09d4 100644 --- a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py +++ b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py @@ -5,7 +5,7 @@ from abc import ABC, abstractmethod from typing import Any, Iterable, Mapping, Optional -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import AirbyteStream from airbyte_cdk.sources.source import ExperimentalClassWarning @@ -14,7 +14,10 @@ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class AbstractStream(ABC): """ AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK. diff --git a/airbyte_cdk/sources/streams/concurrent/adapters.py b/airbyte_cdk/sources/streams/concurrent/adapters.py index fe00cf677..f304bfb21 100644 --- a/airbyte_cdk/sources/streams/concurrent/adapters.py +++ b/airbyte_cdk/sources/streams/concurrent/adapters.py @@ -8,7 +8,7 @@ from functools import lru_cache from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import ( AirbyteLogMessage, @@ -50,7 +50,10 @@ """ -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class StreamFacade(AbstractStreamFacade[DefaultStream], Stream): """ The StreamFacade is a Stream that wraps an AbstractStream and exposes it as a Stream. @@ -297,7 +300,7 @@ def read(self) -> Iterable[Record]: yield Record( data=data_to_return, stream_name=self.stream_name(), - associated_slice=self._slice, + associated_slice=self._slice, # type: ignore [arg-type] ) else: self._message_repository.emit_message(record_data) diff --git a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py index c69a9f3e5..118a7d0bb 100644 --- a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py +++ b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py @@ -6,7 +6,7 @@ from abc import ABC, abstractmethod from typing import Optional -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.sources.source import ExperimentalClassWarning @@ -48,7 +48,10 @@ def message(self) -> Optional[str]: STREAM_AVAILABLE = StreamAvailable() -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class AbstractAvailabilityStrategy(ABC): """ AbstractAvailabilityStrategy is an experimental interface developed as part of the Concurrent CDK. @@ -68,7 +71,10 @@ def check_availability(self, logger: logging.Logger) -> StreamAvailability: """ -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy): """ An availability strategy that always indicates a stream is available. diff --git a/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte_cdk/sources/streams/concurrent/cursor.py index 4f103b224..cbce82a94 100644 --- a/airbyte_cdk/sources/streams/concurrent/cursor.py +++ b/airbyte_cdk/sources/streams/concurrent/cursor.py @@ -473,7 +473,7 @@ def should_be_synced(self, record: Record) -> bool: :return: True if the record's cursor value falls within the sync boundaries """ try: - record_cursor_value: CursorValueType = self._extract_cursor_value(record) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__ + record_cursor_value: CursorValueType = self._extract_cursor_value(record) except ValueError: self._log_for_record_without_cursor_value() return True diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py index 714789af3..3f53a9234 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py @@ -141,7 +141,7 @@ def parse_timestamp(self, timestamp: int) -> datetime: raise ValueError( f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})" ) - return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types + return dt_object class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter): @@ -178,7 +178,7 @@ def parse_timestamp(self, timestamp: str) -> datetime: raise ValueError( f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})" ) - return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types + return dt_object class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter): diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index 51c3682d6..a9aa8550a 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -10,7 +10,7 @@ from functools import cached_property, lru_cache from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union -from deprecated import deprecated +from typing_extensions import deprecated import airbyte_cdk.sources.utils.casing as casing from airbyte_cdk.models import ( @@ -92,8 +92,8 @@ def state(self, value: MutableMapping[str, Any]) -> None: @deprecated( - version="0.87.0", - reason="Deprecated in favor of the CheckpointMixin which offers similar functionality", + "Deprecated as of CDK version 0.87.0. " + "Deprecated in favor of the `CheckpointMixin` which offers similar functionality." ) class IncrementalMixin(CheckpointMixin, ABC): """Mixin to make stream incremental. @@ -115,12 +115,6 @@ class StreamClassification: has_multiple_slices: bool -# Moved to class declaration since get_updated_state is called on every record for incremental syncs, and thus the @deprecated decorator as well. -@deprecated( - version="0.1.49", - reason="Deprecated method get_updated_state, You should use explicit state property instead, see IncrementalMixin docs.", - action="ignore", -) class Stream(ABC): """ Base abstract class for an Airbyte Stream. Makes no assumption of the Stream's underlying transport protocol. @@ -222,7 +216,8 @@ def read( # type: ignore # ignoring typing for ConnectorStateManager because o # Some connectors have streams that implement get_updated_state(), but do not define a cursor_field. This # should be fixed on the stream implementation, but we should also protect against this in the CDK as well stream_state_tracker = self.get_updated_state( - stream_state_tracker, record_data + stream_state_tracker, + record_data, # type: ignore [arg-type] ) self._observe_state(checkpoint_reader, stream_state_tracker) record_counter += 1 @@ -282,7 +277,7 @@ def read_only_records(self, state: Optional[Mapping[str, Any]] = None) -> Iterab if state else {}, # read() expects MutableMapping instead of Mapping which is used more often state_manager=None, - internal_config=InternalConfig(), + internal_config=InternalConfig(), # type: ignore [call-arg] ) @abstractmethod @@ -322,7 +317,7 @@ def as_airbyte_stream(self) -> AirbyteStream: # If we can offer incremental we always should. RFR is always less reliable than incremental which uses a real cursor value if self.supports_incremental: stream.source_defined_cursor = self.source_defined_cursor - stream.supported_sync_modes.append(SyncMode.incremental) # type: ignore + stream.supported_sync_modes.append(SyncMode.incremental) stream.default_cursor_field = self._wrapped_cursor_field() keys = Stream._wrapped_primary_key(self.primary_key) @@ -436,10 +431,18 @@ def state_checkpoint_interval(self) -> Optional[int]: """ return None + # Commented-out to avoid any runtime penalty, since this is used in a hot per-record codepath. + # To be evaluated for re-introduction here: https://github.com/airbytehq/airbyte-python-cdk/issues/116 + # @deprecated( + # "Deprecated method `get_updated_state` as of CDK version 0.1.49. " + # "Please use explicit state property instead, see `IncrementalMixin` docs." + # ) def get_updated_state( self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any] ) -> MutableMapping[str, Any]: - """Override to extract state from the latest record. Needed to implement incremental sync. + """DEPRECATED. Please use explicit state property instead, see `IncrementalMixin` docs. + + Override to extract state from the latest record. Needed to implement incremental sync. Inspects the latest record extracted from the data source and the current state object and return an updated state object. @@ -654,7 +657,7 @@ def _checkpoint_state( # type: ignore # ignoring typing for ConnectorStateMana # todo: This can be consolidated into one ConnectorStateManager.update_and_create_state_message() method, but I want # to reduce changes right now and this would span concurrent as well state_manager.update_state_for_stream(self.name, self.namespace, stream_state) - return state_manager.create_state_message(self.name, self.namespace) + return state_manager.create_state_message(self.name, self.namespace) # type: ignore [no-any-return] @property def configured_json_schema(self) -> Optional[Dict[str, Any]]: diff --git a/airbyte_cdk/sources/streams/http/http.py b/airbyte_cdk/sources/streams/http/http.py index f465671be..40eab27a3 100644 --- a/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte_cdk/sources/streams/http/http.py @@ -9,8 +9,8 @@ from urllib.parse import urljoin import requests -from deprecated import deprecated from requests.auth import AuthBase +from typing_extensions import deprecated from airbyte_cdk.models import AirbyteMessage, FailureType, SyncMode from airbyte_cdk.models import Type as MessageType @@ -121,8 +121,8 @@ def http_method(self) -> str: @property @deprecated( - version="3.0.0", - reason="You should set error_handler explicitly in HttpStream.get_error_handler() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set error_handler explicitly in HttpStream.get_error_handler() instead." ) def raise_on_http_errors(self) -> bool: """ @@ -132,8 +132,8 @@ def raise_on_http_errors(self) -> bool: @property @deprecated( - version="3.0.0", - reason="You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) def max_retries(self) -> Union[int, None]: """ @@ -143,8 +143,8 @@ def max_retries(self) -> Union[int, None]: @property @deprecated( - version="3.0.0", - reason="You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) def max_time(self) -> Union[int, None]: """ @@ -154,8 +154,8 @@ def max_time(self) -> Union[int, None]: @property @deprecated( - version="3.0.0", - reason="You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) def retry_factor(self) -> float: """ @@ -594,7 +594,7 @@ def stream_slices( # Skip non-records (eg AirbyteLogMessage) if isinstance(parent_record, AirbyteMessage): if parent_record.type == MessageType.RECORD: - parent_record = parent_record.record.data + parent_record = parent_record.record.data # type: ignore [assignment, union-attr] # Incorrect type for assignment else: continue elif isinstance(parent_record, Record): @@ -603,8 +603,8 @@ def stream_slices( @deprecated( - version="3.0.0", - reason="You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead.", + "Deprecated as of CDK version 3.0.0." + "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) class HttpStreamAdapterBackoffStrategy(BackoffStrategy): def __init__(self, stream: HttpStream): @@ -619,8 +619,8 @@ def backoff_time( @deprecated( - version="3.0.0", - reason="You should set error_handler explicitly in HttpStream.get_error_handler() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set error_handler explicitly in HttpStream.get_error_handler() instead." ) class HttpStreamAdapterHttpStatusErrorHandler(HttpStatusErrorHandler): def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa @@ -639,15 +639,15 @@ def interpret_response( return ErrorResolution( response_action=ResponseAction.RATE_LIMITED, failure_type=FailureType.transient_error, - error_message=f"Response status code: {response_or_exception.status_code}. Retrying...", # type: ignore[union-attr] + error_message=f"Response status code: {response_or_exception.status_code}. Retrying...", ) return ErrorResolution( response_action=ResponseAction.RETRY, failure_type=FailureType.transient_error, - error_message=f"Response status code: {response_or_exception.status_code}. Retrying...", # type: ignore[union-attr] + error_message=f"Response status code: {response_or_exception.status_code}. Retrying...", ) else: - if response_or_exception.ok: # type: ignore # noqa + if response_or_exception.ok: return ErrorResolution( response_action=ResponseAction.SUCCESS, failure_type=None, @@ -657,13 +657,13 @@ def interpret_response( return ErrorResolution( response_action=ResponseAction.FAIL, failure_type=FailureType.transient_error, - error_message=f"Response status code: {response_or_exception.status_code}. Unexpected error. Failed.", # type: ignore[union-attr] + error_message=f"Response status code: {response_or_exception.status_code}. Unexpected error. Failed.", ) else: return ErrorResolution( response_action=ResponseAction.IGNORE, failure_type=FailureType.transient_error, - error_message=f"Response status code: {response_or_exception.status_code}. Ignoring...", # type: ignore[union-attr] + error_message=f"Response status code: {response_or_exception.status_code}. Ignoring...", ) else: self._logger.error(f"Received unexpected response type: {type(response_or_exception)}") diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index 91e2a63d9..4f99bbeba 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -144,7 +144,7 @@ def _request_session(self) -> requests.Session: sqlite_path = "file::memory:?cache=shared" return CachedLimiterSession( sqlite_path, backend="sqlite", api_budget=self._api_budget, match_headers=True - ) # type: ignore # there are no typeshed stubs for requests_cache + ) else: return LimiterSession(api_budget=self._api_budget) @@ -324,7 +324,7 @@ def _send( formatter = log_formatter self._message_repository.log_message( Level.DEBUG, - lambda: formatter(response), # type: ignore # log_formatter is always cast to a callable + lambda: formatter(response), ) self._handle_error_resolution( diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py index db59600db..ffcc8e851 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py @@ -5,13 +5,14 @@ from abc import abstractmethod from typing import Any, Mapping +import requests from requests.auth import AuthBase class AbstractHeaderAuthenticator(AuthBase): """Abstract class for an header-based authenticators that add a header to outgoing HTTP requests.""" - def __call__(self, request): + def __call__(self, request: requests.PreparedRequest) -> Any: """Attach the HTTP headers required to authenticate on the HTTP request""" request.headers.update(self.get_auth_header()) return request diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index bdc5eddcd..8e5c71458 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -30,12 +30,12 @@ def __init__( client_id: str, client_secret: str, refresh_token: str, - scopes: List[str] = None, - token_expiry_date: pendulum.DateTime = None, - token_expiry_date_format: str = None, + scopes: List[str] | None = None, + token_expiry_date: pendulum.DateTime | None = None, + token_expiry_date_format: str | None = None, access_token_name: str = "access_token", expires_in_name: str = "expires_in", - refresh_request_body: Mapping[str, Any] = None, + refresh_request_body: Mapping[str, Any] | None = None, grant_type: str = "refresh_token", token_expiry_is_time_of_expiration: bool = False, refresh_token_error_status_codes: Tuple[int, ...] = (), @@ -52,7 +52,7 @@ def __init__( self._refresh_request_body = refresh_request_body self._grant_type = grant_type - self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) + self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) # type: ignore [no-untyped-call] self._token_expiry_date_format = token_expiry_date_format self._token_expiry_is_time_of_expiration = token_expiry_is_time_of_expiration self._access_token = None @@ -75,14 +75,14 @@ def get_refresh_token(self) -> str: def get_access_token_name(self) -> str: return self._access_token_name - def get_scopes(self) -> [str]: - return self._scopes + def get_scopes(self) -> list[str]: + return self._scopes # type: ignore [return-value] def get_expires_in_name(self) -> str: return self._expires_in_name def get_refresh_request_body(self) -> Mapping[str, Any]: - return self._refresh_request_body + return self._refresh_request_body # type: ignore [return-value] def get_grant_type(self) -> str: return self._grant_type @@ -90,7 +90,7 @@ def get_grant_type(self) -> str: def get_token_expiry_date(self) -> pendulum.DateTime: return self._token_expiry_date - def set_token_expiry_date(self, value: Union[str, int]): + def set_token_expiry_date(self, value: Union[str, int]) -> None: self._token_expiry_date = self._parse_token_expiration_date(value) @property @@ -103,11 +103,11 @@ def token_expiry_date_format(self) -> Optional[str]: @property def access_token(self) -> str: - return self._access_token + return self._access_token # type: ignore [return-value] @access_token.setter - def access_token(self, value: str): - self._access_token = value + def access_token(self, value: str) -> None: + self._access_token = value # type: ignore [assignment] # Incorrect type for assignment class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator): @@ -124,11 +124,11 @@ def __init__( self, connector_config: Mapping[str, Any], token_refresh_endpoint: str, - scopes: List[str] = None, + scopes: List[str] | None = None, access_token_name: str = "access_token", expires_in_name: str = "expires_in", refresh_token_name: str = "refresh_token", - refresh_request_body: Mapping[str, Any] = None, + refresh_request_body: Mapping[str, Any] | None = None, grant_type: str = "refresh_token", client_id: Optional[str] = None, client_secret: Optional[str] = None, @@ -162,14 +162,17 @@ def __init__( message_repository (MessageRepository): the message repository used to emit logs on HTTP requests and control message on config update """ self._client_id = ( - client_id + client_id # type: ignore [assignment] # Incorrect type for assignment if client_id is not None - else dpath.get(connector_config, ("credentials", "client_id")) + else dpath.get(connector_config, ("credentials", "client_id")) # type: ignore [arg-type] ) self._client_secret = ( - client_secret + client_secret # type: ignore [assignment] # Incorrect type for assignment if client_secret is not None - else dpath.get(connector_config, ("credentials", "client_secret")) + else dpath.get( + connector_config, # type: ignore [arg-type] + ("credentials", "client_secret"), + ) ) self._access_token_config_path = access_token_config_path self._refresh_token_config_path = refresh_token_config_path @@ -207,27 +210,50 @@ def get_client_secret(self) -> str: @property def access_token(self) -> str: - return dpath.get(self._connector_config, self._access_token_config_path, default="") + return dpath.get( # type: ignore [return-value] + self._connector_config, # type: ignore [arg-type] + self._access_token_config_path, + default="", + ) @access_token.setter - def access_token(self, new_access_token: str): - dpath.new(self._connector_config, self._access_token_config_path, new_access_token) + def access_token(self, new_access_token: str) -> None: + dpath.new( + self._connector_config, # type: ignore [arg-type] + self._access_token_config_path, + new_access_token, + ) def get_refresh_token(self) -> str: - return dpath.get(self._connector_config, self._refresh_token_config_path, default="") + return dpath.get( # type: ignore [return-value] + self._connector_config, # type: ignore [arg-type] + self._refresh_token_config_path, + default="", + ) - def set_refresh_token(self, new_refresh_token: str): - dpath.new(self._connector_config, self._refresh_token_config_path, new_refresh_token) + def set_refresh_token(self, new_refresh_token: str) -> None: + dpath.new( + self._connector_config, # type: ignore [arg-type] + self._refresh_token_config_path, + new_refresh_token, + ) def get_token_expiry_date(self) -> pendulum.DateTime: expiry_date = dpath.get( - self._connector_config, self._token_expiry_date_config_path, default="" + self._connector_config, # type: ignore [arg-type] + self._token_expiry_date_config_path, + default="", ) - return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) + return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) # type: ignore [arg-type, return-value, no-untyped-call] - def set_token_expiry_date(self, new_token_expiry_date): + def set_token_expiry_date( # type: ignore[override] + self, + new_token_expiry_date: pendulum.DateTime, + ) -> None: dpath.new( - self._connector_config, self._token_expiry_date_config_path, str(new_token_expiry_date) + self._connector_config, # type: ignore [arg-type] + self._token_expiry_date_config_path, + str(new_token_expiry_date), ) def token_has_expired(self) -> bool: @@ -236,7 +262,8 @@ def token_has_expired(self) -> bool: @staticmethod def get_new_token_expiry_date( - access_token_expires_in: str, token_expiry_date_format: str = None + access_token_expires_in: str, + token_expiry_date_format: str | None = None, ) -> pendulum.DateTime: if token_expiry_date_format: return pendulum.from_format(access_token_expires_in, token_expiry_date_format) @@ -253,7 +280,7 @@ def get_access_token(self) -> str: new_access_token, access_token_expires_in, new_refresh_token = ( self.refresh_access_token() ) - new_token_expiry_date = self.get_new_token_expiry_date( + new_token_expiry_date: pendulum.DateTime = self.get_new_token_expiry_date( access_token_expires_in, self._token_expiry_date_format ) self.access_token = new_access_token @@ -264,13 +291,15 @@ def get_access_token(self) -> str: # message directly in the console, this is needed if not isinstance(self._message_repository, NoopMessageRepository): self._message_repository.emit_message( - create_connector_config_control_message(self._connector_config) + create_connector_config_control_message(self._connector_config) # type: ignore [arg-type] ) else: - emit_configuration_as_airbyte_control_message(self._connector_config) + emit_configuration_as_airbyte_control_message(self._connector_config) # type: ignore [arg-type] return self.access_token - def refresh_access_token(self) -> Tuple[str, str, str]: + def refresh_access_token( # type: ignore[override] # Signature doesn't match base class + self, + ) -> Tuple[str, str, str]: response_json = self._get_refresh_access_token_response() return ( response_json[self.get_access_token_name()], diff --git a/airbyte_cdk/sources/utils/record_helper.py b/airbyte_cdk/sources/utils/record_helper.py index e45601c22..3d2cbcecf 100644 --- a/airbyte_cdk/sources/utils/record_helper.py +++ b/airbyte_cdk/sources/utils/record_helper.py @@ -35,7 +35,7 @@ def stream_data_to_airbyte_message( # need it to normalize values against json schema. By default no action # taken unless configured. See # docs/connector-development/cdk-python/schemas.md for details. - transformer.transform(data, schema) # type: ignore + transformer.transform(data, schema) if is_file_transfer_message: message = AirbyteFileTransferRecordMessage( stream=stream_name, file=data, emitted_at=now_millis, data={} diff --git a/airbyte_cdk/sources/utils/schema_helpers.py b/airbyte_cdk/sources/utils/schema_helpers.py index b8d2507c6..f15578238 100644 --- a/airbyte_cdk/sources/utils/schema_helpers.py +++ b/airbyte_cdk/sources/utils/schema_helpers.py @@ -194,7 +194,7 @@ class InternalConfig(BaseModel): def dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]: kwargs["by_alias"] = True kwargs["exclude_unset"] = True - return super().dict(*args, **kwargs) # type: ignore[no-any-return] + return super().dict(*args, **kwargs) def is_limit_reached(self, records_counter: int) -> bool: """ diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index ef52c5fd3..d6885e8c3 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -5,9 +5,9 @@ import logging from distutils.util import strtobool from enum import Flag, auto -from typing import Any, Callable, Dict, Mapping, Optional +from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast -from jsonschema import Draft7Validator, ValidationError, validators +from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators json_to_python_simple = { "string": str, @@ -30,7 +30,7 @@ class TransformConfig(Flag): ``` """ - # No action taken, default behaviour. Cannot be combined with any other options. + # No action taken, default behavior. Cannot be combined with any other options. NoTransform = auto() # Applies default type casting with default_convert method which converts # values by applying simple type casting to specified jsonschema type. @@ -67,15 +67,15 @@ def __init__(self, config: TransformConfig): ) def registerCustomTransform( - self, normalization_callback: Callable[[Any, Dict[str, Any]], Any] - ) -> Callable: + self, normalization_callback: Callable[[Any, dict[str, Any]], Any] + ) -> Callable[[Any, dict[str, Any]], Any]: """ Register custom normalization callback. :param normalization_callback function to be used for value normalization. Takes original value and part type schema. Should return normalized value. See docs/connector-development/cdk-python/schemas.md for details. - :return Same callbeck, this is usefull for using registerCustomTransform function as decorator. + :return Same callback, this is useful for using registerCustomTransform function as decorator. """ if TransformConfig.CustomSchemaNormalization not in self._config: raise Exception( @@ -141,7 +141,11 @@ def default_convert(original_item: Any, subschema: Dict[str, Any]) -> Any: return original_item return original_item - def __get_normalizer(self, schema_key: str, original_validator: Callable): + def __get_normalizer( + self, + schema_key: str, + original_validator: Callable, # type: ignore[type-arg] + ) -> Callable[[Any, Any, Any, dict[str, Any]], Generator[Any, Any, None]]: """ Traverse through object fields using native jsonschema validator and apply normalization function. :param schema_key related json schema key that currently being validated/normalized. @@ -149,8 +153,11 @@ def __get_normalizer(self, schema_key: str, original_validator: Callable): """ def normalizator( - validator_instance: Callable, property_value: Any, instance: Any, schema: Dict[str, Any] - ): + validator_instance: Validator, + property_value: Any, + instance: Any, + schema: Dict[str, Any], + ) -> Generator[Any, Any, None]: """ Jsonschema validator callable it uses for validating instance. We override default Draft7Validator to perform value transformation @@ -163,10 +170,13 @@ def normalizator( : """ - def resolve(subschema): + def resolve(subschema: dict[str, Any]) -> dict[str, Any]: if "$ref" in subschema: - _, resolved = validator_instance.resolver.resolve(subschema["$ref"]) - return resolved + _, resolved = cast( + RefResolver, + validator_instance.resolver, + ).resolve(subschema["$ref"]) + return cast(dict[str, Any], resolved) return subschema # Transform object and array values before running json schema type checking for each element. @@ -185,11 +195,20 @@ def resolve(subschema): instance[index] = self.__normalize(item, subschema) # Running native jsonschema traverse algorithm after field normalization is done. - yield from original_validator(validator_instance, property_value, instance, schema) + yield from original_validator( + validator_instance, + property_value, + instance, + schema, + ) return normalizator - def transform(self, record: Dict[str, Any], schema: Mapping[str, Any]): + def transform( + self, + record: Dict[str, Any], + schema: Mapping[str, Any], + ) -> None: """ Normalize and validate according to config. :param record: record instance for normalization/transformation. All modification are done by modifying existent object. @@ -201,7 +220,7 @@ def transform(self, record: Dict[str, Any], schema: Mapping[str, Any]): for e in normalizer.iter_errors(record): """ just calling normalizer.validate() would throw an exception on - first validation occurences and stop processing rest of schema. + first validation occurrences and stop processing rest of schema. """ logger.warning(self.get_error_message(e)) diff --git a/airbyte_cdk/test/entrypoint_wrapper.py b/airbyte_cdk/test/entrypoint_wrapper.py index 847570cd6..f8e85bfb0 100644 --- a/airbyte_cdk/test/entrypoint_wrapper.py +++ b/airbyte_cdk/test/entrypoint_wrapper.py @@ -23,7 +23,7 @@ from pathlib import Path from typing import Any, List, Mapping, Optional, Union -from orjson import orjson +import orjson from pydantic import ValidationError as V2ValidationError from serpyco_rs import SchemaValidationError @@ -63,7 +63,7 @@ def __init__(self, messages: List[str], uncaught_exception: Optional[BaseExcepti @staticmethod def _parse_message(message: str) -> AirbyteMessage: try: - return AirbyteMessageSerializer.load(orjson.loads(message)) # type: ignore[no-any-return] # Serializer.load() always returns AirbyteMessage + return AirbyteMessageSerializer.load(orjson.loads(message)) except (orjson.JSONDecodeError, SchemaValidationError): # The platform assumes that logs that are not of AirbyteMessage format are log messages return AirbyteMessage( @@ -129,14 +129,19 @@ def _get_trace_message_by_trace_type(self, trace_type: TraceType) -> List[Airbyt return [ message for message in self._get_message_by_types([Type.TRACE]) - if message.trace.type == trace_type - ] # type: ignore[union-attr] # trace has `type` + if message.trace.type == trace_type # type: ignore[union-attr] # trace has `type` + ] def is_in_logs(self, pattern: str) -> bool: """Check if any log message case-insensitive matches the pattern.""" return any( - re.search(pattern, entry.log.message, flags=re.IGNORECASE) for entry in self.logs - ) # type: ignore[union-attr] # log has `message` + re.search( + pattern, + entry.log.message, # type: ignore[union-attr] # log has `message` + flags=re.IGNORECASE, + ) + for entry in self.logs + ) def is_not_in_logs(self, pattern: str) -> bool: """Check if no log message matches the case-insensitive pattern.""" diff --git a/airbyte_cdk/test/mock_http/response_builder.py b/airbyte_cdk/test/mock_http/response_builder.py index b517343e6..7f9583827 100644 --- a/airbyte_cdk/test/mock_http/response_builder.py +++ b/airbyte_cdk/test/mock_http/response_builder.py @@ -183,7 +183,7 @@ def build(self) -> HttpResponse: def _get_unit_test_folder(execution_folder: str) -> FilePath: # FIXME: This function should be removed after the next CDK release to avoid breaking amazon-seller-partner test code. - return get_unit_test_folder(execution_folder) # type: ignore # get_unit_test_folder is known to return a FilePath + return get_unit_test_folder(execution_folder) def find_template(resource: str, execution_folder: str) -> Dict[str, Any]: diff --git a/airbyte_cdk/utils/airbyte_secrets_utils.py b/airbyte_cdk/utils/airbyte_secrets_utils.py index 45279e573..bb5a6be59 100644 --- a/airbyte_cdk/utils/airbyte_secrets_utils.py +++ b/airbyte_cdk/utils/airbyte_secrets_utils.py @@ -47,7 +47,7 @@ def get_secrets( result = [] for path in secret_paths: try: - result.append(dpath.get(config, path)) + result.append(dpath.get(config, path)) # type: ignore # dpath expect MutableMapping but doesn't need it except KeyError: # Since we try to get paths to all known secrets in the spec, in the case of oneOfs, some secret fields may not be present # In that case, a KeyError is thrown. This is expected behavior. diff --git a/airbyte_cdk/utils/event_timing.py b/airbyte_cdk/utils/event_timing.py index 447543ec0..3f489c096 100644 --- a/airbyte_cdk/utils/event_timing.py +++ b/airbyte_cdk/utils/event_timing.py @@ -7,7 +7,7 @@ import time from contextlib import contextmanager from dataclasses import dataclass, field -from typing import Optional +from typing import Any, Generator, Literal, Optional logger = logging.getLogger("airbyte") @@ -18,13 +18,13 @@ class EventTimer: Event nesting follows a LIFO pattern, so finish will apply to the last started event. """ - def __init__(self, name): + def __init__(self, name: str) -> None: self.name = name - self.events = {} + self.events: dict[str, Any] = {} self.count = 0 - self.stack = [] + self.stack: list[Any] = [] - def start_event(self, name): + def start_event(self, name: str) -> None: """ Start a new event and push it to the stack. """ @@ -32,7 +32,7 @@ def start_event(self, name): self.count += 1 self.stack.insert(0, self.events[name]) - def finish_event(self): + def finish_event(self) -> None: """ Finish the current event and pop it from the stack. """ @@ -43,7 +43,7 @@ def finish_event(self): else: logger.warning(f"{self.name} finish_event called without start_event") - def report(self, order_by="name"): + def report(self, order_by: Literal["name", "duration"] = "name") -> str: """ :param order_by: 'name' or 'duration' """ @@ -69,15 +69,15 @@ def duration(self) -> float: return (self.end - self.start) / 1e9 return float("+inf") - def __str__(self): + def __str__(self) -> str: return f"{self.name} {datetime.timedelta(seconds=self.duration)}" - def finish(self): + def finish(self) -> None: self.end = time.perf_counter_ns() @contextmanager -def create_timer(name): +def create_timer(name: str) -> Generator[EventTimer, Any, None]: """ Creates a new EventTimer as a context manager to improve code readability. """ diff --git a/airbyte_cdk/utils/message_utils.py b/airbyte_cdk/utils/message_utils.py index 7e740b788..148b19ae2 100644 --- a/airbyte_cdk/utils/message_utils.py +++ b/airbyte_cdk/utils/message_utils.py @@ -8,15 +8,16 @@ def get_stream_descriptor(message: AirbyteMessage) -> HashableStreamDescriptor: match message.type: case Type.RECORD: return HashableStreamDescriptor( - name=message.record.stream, namespace=message.record.namespace - ) # type: ignore[union-attr] # record has `stream` and `namespace` + name=message.record.stream, # type: ignore[union-attr] # record has `stream` + namespace=message.record.namespace, # type: ignore[union-attr] # record has `namespace` + ) case Type.STATE: if not message.state.stream or not message.state.stream.stream_descriptor: # type: ignore[union-attr] # state has `stream` raise ValueError( "State message was not in per-stream state format, which is required for record counts." ) return HashableStreamDescriptor( - name=message.state.stream.stream_descriptor.name, + name=message.state.stream.stream_descriptor.name, # type: ignore[union-attr] # state has `stream` namespace=message.state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # state has `stream` ) case _: diff --git a/airbyte_cdk/utils/spec_schema_transformations.py b/airbyte_cdk/utils/spec_schema_transformations.py index 8d47f83e5..fdc41a541 100644 --- a/airbyte_cdk/utils/spec_schema_transformations.py +++ b/airbyte_cdk/utils/spec_schema_transformations.py @@ -4,11 +4,12 @@ import json import re +from typing import Any from jsonschema import RefResolver -def resolve_refs(schema: dict) -> dict: +def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]: """ For spec schemas generated using Pydantic models, the resulting JSON schema can contain refs between object relationships. @@ -20,6 +21,6 @@ def resolve_refs(schema: dict) -> dict: str_schema = str_schema.replace( ref_block, json.dumps(json_schema_ref_resolver.resolve(ref)[1]) ) - pyschema: dict = json.loads(str_schema) + pyschema: dict[str, Any] = json.loads(str_schema) del pyschema["definitions"] return pyschema diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index 75b6eab16..59dbab2a5 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -3,9 +3,9 @@ # import time import traceback -from typing import Optional +from typing import Any, Optional -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteConnectionStatus, @@ -104,9 +104,9 @@ def from_exception( cls, exc: BaseException, stream_descriptor: Optional[StreamDescriptor] = None, - *args, - **kwargs, - ) -> "AirbyteTracedException": # type: ignore # ignoring because of args and kwargs + *args: Any, + **kwargs: Any, + ) -> "AirbyteTracedException": """ Helper to create an AirbyteTracedException from an existing exception :param exc: the exception that caused the error @@ -131,13 +131,15 @@ def as_sanitized_airbyte_message( """ error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor) if error_message.trace.error.message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage - error_message.trace.error.message = filter_secrets(error_message.trace.error.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + error_message.trace.error.message = filter_secrets( # type: ignore[union-attr] + error_message.trace.error.message, # type: ignore[union-attr] + ) if error_message.trace.error.internal_message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage - error_message.trace.error.internal_message = filter_secrets( - error_message.trace.error.internal_message - ) # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + error_message.trace.error.internal_message = filter_secrets( # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + error_message.trace.error.internal_message # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + ) if error_message.trace.error.stack_trace: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage - error_message.trace.error.stack_trace = filter_secrets( - error_message.trace.error.stack_trace - ) # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + error_message.trace.error.stack_trace = filter_secrets( # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + error_message.trace.error.stack_trace # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + ) return error_message diff --git a/bin/run-mypy-on-modified-files.sh b/bin/run-mypy-on-modified-files.sh deleted file mode 100755 index 537e3c843..000000000 --- a/bin/run-mypy-on-modified-files.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env sh - -set -e - -# Ensure script always runs from the project directory. -cd "$(dirname "${0}")/.." || exit 1 - -# TODO change this to include unit_tests as well once it's in a good state -{ - git diff --name-only --diff-filter=d --relative ':(exclude)unit_tests' - git diff --name-only --diff-filter=d --staged --relative ':(exclude)unit_tests' - git diff --name-only --diff-filter=d main... --relative ':(exclude)unit_tests' -} | grep -E '\.py$' | sort | uniq | xargs mypy --config-file mypy.ini --install-types --non-interactive diff --git a/poetry.lock b/poetry.lock index 0fef0a902..b7b9736bf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -930,23 +930,6 @@ files = [ marshmallow = ">=3.18.0,<4.0.0" typing-inspect = ">=0.4.0,<1" -[[package]] -name = "deprecated" -version = "1.2.15" -description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" -files = [ - {file = "Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320"}, - {file = "deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d"}, -] - -[package.dependencies] -wrapt = ">=1.10,<2" - -[package.extras] -dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "jinja2 (>=3.0.3,<3.1.0)", "setuptools", "sphinx (<2)", "tox"] - [[package]] name = "docutils" version = "0.17.1" @@ -4279,11 +4262,6 @@ files = [ {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, - {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, @@ -4878,6 +4856,57 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "types-cachetools" +version = "5.5.0.20240820" +description = "Typing stubs for cachetools" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-cachetools-5.5.0.20240820.tar.gz", hash = "sha256:b888ab5c1a48116f7799cd5004b18474cd82b5463acb5ffb2db2fc9c7b053bc0"}, + {file = "types_cachetools-5.5.0.20240820-py3-none-any.whl", hash = "sha256:efb2ed8bf27a4b9d3ed70d33849f536362603a90b8090a328acf0cd42fda82e2"}, +] + +[[package]] +name = "types-cffi" +version = "1.16.0.20240331" +description = "Typing stubs for cffi" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-cffi-1.16.0.20240331.tar.gz", hash = "sha256:b8b20d23a2b89cfed5f8c5bc53b0cb8677c3aac6d970dbc771e28b9c698f5dee"}, + {file = "types_cffi-1.16.0.20240331-py3-none-any.whl", hash = "sha256:a363e5ea54a4eb6a4a105d800685fde596bc318089b025b27dee09849fe41ff0"}, +] + +[package.dependencies] +types-setuptools = "*" + +[[package]] +name = "types-pyopenssl" +version = "24.1.0.20240722" +description = "Typing stubs for pyOpenSSL" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-pyOpenSSL-24.1.0.20240722.tar.gz", hash = "sha256:47913b4678a01d879f503a12044468221ed8576263c1540dcb0484ca21b08c39"}, + {file = "types_pyOpenSSL-24.1.0.20240722-py3-none-any.whl", hash = "sha256:6a7a5d2ec042537934cfb4c9d4deb0e16c4c6250b09358df1f083682fe6fda54"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-cffi = "*" + +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20241003" +description = "Typing stubs for python-dateutil" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"}, + {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"}, +] + [[package]] name = "types-pytz" version = "2024.2.0.20241003" @@ -4889,6 +4918,68 @@ files = [ {file = "types_pytz-2024.2.0.20241003-py3-none-any.whl", hash = "sha256:3e22df1336c0c6ad1d29163c8fda82736909eb977281cb823c57f8bae07118b7"}, ] +[[package]] +name = "types-pyyaml" +version = "6.0.12.20240917" +description = "Typing stubs for PyYAML" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-PyYAML-6.0.12.20240917.tar.gz", hash = "sha256:d1405a86f9576682234ef83bcb4e6fff7c9305c8b1fbad5e0bcd4f7dbdc9c587"}, + {file = "types_PyYAML-6.0.12.20240917-py3-none-any.whl", hash = "sha256:392b267f1c0fe6022952462bf5d6523f31e37f6cea49b14cee7ad634b6301570"}, +] + +[[package]] +name = "types-redis" +version = "4.6.0.20241004" +description = "Typing stubs for redis" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-redis-4.6.0.20241004.tar.gz", hash = "sha256:5f17d2b3f9091ab75384153bfa276619ffa1cf6a38da60e10d5e6749cc5b902e"}, + {file = "types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-pyOpenSSL = "*" + +[[package]] +name = "types-requests" +version = "2.32.0.20241016" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-requests-2.32.0.20241016.tar.gz", hash = "sha256:0d9cad2f27515d0e3e3da7134a1b6f28fb97129d86b867f24d9c726452634d95"}, + {file = "types_requests-2.32.0.20241016-py3-none-any.whl", hash = "sha256:4195d62d6d3e043a4eaaf08ff8a62184584d2e8684e9d2aa178c7915a7da3747"}, +] + +[package.dependencies] +urllib3 = ">=2" + +[[package]] +name = "types-setuptools" +version = "75.6.0.20241126" +description = "Typing stubs for setuptools" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types_setuptools-75.6.0.20241126-py3-none-any.whl", hash = "sha256:aaae310a0e27033c1da8457d4d26ac673b0c8a0de7272d6d4708e263f2ea3b9b"}, + {file = "types_setuptools-75.6.0.20241126.tar.gz", hash = "sha256:7bf25ad4be39740e469f9268b6beddda6e088891fa5a27e985c6ce68bf62ace0"}, +] + +[[package]] +name = "types-ujson" +version = "5.10.0.20240515" +description = "Typing stubs for ujson" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-ujson-5.10.0.20240515.tar.gz", hash = "sha256:ceae7127f0dafe4af5dd0ecf98ee13e9d75951ef963b5c5a9b7ea92e0d71f0d7"}, + {file = "types_ujson-5.10.0.20240515-py3-none-any.whl", hash = "sha256:02bafc36b3a93d2511757a64ff88bd505e0a57fba08183a9150fbcfcb2015310"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -5097,80 +5188,6 @@ MarkupSafe = ">=2.1.1" [package.extras] watchdog = ["watchdog (>=2.3)"] -[[package]] -name = "wrapt" -version = "1.17.0" -description = "Module for decorators, wrappers and monkey patching." -optional = false -python-versions = ">=3.8" -files = [ - {file = "wrapt-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a0c23b8319848426f305f9cb0c98a6e32ee68a36264f45948ccf8e7d2b941f8"}, - {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1ca5f060e205f72bec57faae5bd817a1560fcfc4af03f414b08fa29106b7e2d"}, - {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e185ec6060e301a7e5f8461c86fb3640a7beb1a0f0208ffde7a65ec4074931df"}, - {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb90765dd91aed05b53cd7a87bd7f5c188fcd95960914bae0d32c5e7f899719d"}, - {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:879591c2b5ab0a7184258274c42a126b74a2c3d5a329df16d69f9cee07bba6ea"}, - {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fce6fee67c318fdfb7f285c29a82d84782ae2579c0e1b385b7f36c6e8074fffb"}, - {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0698d3a86f68abc894d537887b9bbf84d29bcfbc759e23f4644be27acf6da301"}, - {file = "wrapt-1.17.0-cp310-cp310-win32.whl", hash = "sha256:69d093792dc34a9c4c8a70e4973a3361c7a7578e9cd86961b2bbf38ca71e4e22"}, - {file = "wrapt-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:f28b29dc158ca5d6ac396c8e0a2ef45c4e97bb7e65522bfc04c989e6fe814575"}, - {file = "wrapt-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:74bf625b1b4caaa7bad51d9003f8b07a468a704e0644a700e936c357c17dd45a"}, - {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f2a28eb35cf99d5f5bd12f5dd44a0f41d206db226535b37b0c60e9da162c3ed"}, - {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:81b1289e99cf4bad07c23393ab447e5e96db0ab50974a280f7954b071d41b489"}, - {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2939cd4a2a52ca32bc0b359015718472d7f6de870760342e7ba295be9ebaf9"}, - {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a9653131bda68a1f029c52157fd81e11f07d485df55410401f745007bd6d339"}, - {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4e4b4385363de9052dac1a67bfb535c376f3d19c238b5f36bddc95efae15e12d"}, - {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bdf62d25234290db1837875d4dceb2151e4ea7f9fff2ed41c0fde23ed542eb5b"}, - {file = "wrapt-1.17.0-cp311-cp311-win32.whl", hash = "sha256:5d8fd17635b262448ab8f99230fe4dac991af1dabdbb92f7a70a6afac8a7e346"}, - {file = "wrapt-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:92a3d214d5e53cb1db8b015f30d544bc9d3f7179a05feb8f16df713cecc2620a"}, - {file = "wrapt-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:89fc28495896097622c3fc238915c79365dd0ede02f9a82ce436b13bd0ab7569"}, - {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:875d240fdbdbe9e11f9831901fb8719da0bd4e6131f83aa9f69b96d18fae7504"}, - {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ed16d95fd142e9c72b6c10b06514ad30e846a0d0917ab406186541fe68b451"}, - {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18b956061b8db634120b58f668592a772e87e2e78bc1f6a906cfcaa0cc7991c1"}, - {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:daba396199399ccabafbfc509037ac635a6bc18510ad1add8fd16d4739cdd106"}, - {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4d63f4d446e10ad19ed01188d6c1e1bb134cde8c18b0aa2acfd973d41fcc5ada"}, - {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8a5e7cc39a45fc430af1aefc4d77ee6bad72c5bcdb1322cfde852c15192b8bd4"}, - {file = "wrapt-1.17.0-cp312-cp312-win32.whl", hash = "sha256:0a0a1a1ec28b641f2a3a2c35cbe86c00051c04fffcfcc577ffcdd707df3f8635"}, - {file = "wrapt-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:3c34f6896a01b84bab196f7119770fd8466c8ae3dfa73c59c0bb281e7b588ce7"}, - {file = "wrapt-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:714c12485aa52efbc0fc0ade1e9ab3a70343db82627f90f2ecbc898fdf0bb181"}, - {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da427d311782324a376cacb47c1a4adc43f99fd9d996ffc1b3e8529c4074d393"}, - {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba1739fb38441a27a676f4de4123d3e858e494fac05868b7a281c0a383c098f4"}, - {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e711fc1acc7468463bc084d1b68561e40d1eaa135d8c509a65dd534403d83d7b"}, - {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:140ea00c87fafc42739bd74a94a5a9003f8e72c27c47cd4f61d8e05e6dec8721"}, - {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:73a96fd11d2b2e77d623a7f26e004cc31f131a365add1ce1ce9a19e55a1eef90"}, - {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0b48554952f0f387984da81ccfa73b62e52817a4386d070c75e4db7d43a28c4a"}, - {file = "wrapt-1.17.0-cp313-cp313-win32.whl", hash = "sha256:498fec8da10e3e62edd1e7368f4b24aa362ac0ad931e678332d1b209aec93045"}, - {file = "wrapt-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd136bb85f4568fffca995bd3c8d52080b1e5b225dbf1c2b17b66b4c5fa02838"}, - {file = "wrapt-1.17.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:17fcf043d0b4724858f25b8826c36e08f9fb2e475410bece0ec44a22d533da9b"}, - {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4a557d97f12813dc5e18dad9fa765ae44ddd56a672bb5de4825527c847d6379"}, - {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0229b247b0fc7dee0d36176cbb79dbaf2a9eb7ecc50ec3121f40ef443155fb1d"}, - {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8425cfce27b8b20c9b89d77fb50e368d8306a90bf2b6eef2cdf5cd5083adf83f"}, - {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9c900108df470060174108012de06d45f514aa4ec21a191e7ab42988ff42a86c"}, - {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:4e547b447073fc0dbfcbff15154c1be8823d10dab4ad401bdb1575e3fdedff1b"}, - {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:914f66f3b6fc7b915d46c1cc424bc2441841083de01b90f9e81109c9759e43ab"}, - {file = "wrapt-1.17.0-cp313-cp313t-win32.whl", hash = "sha256:a4192b45dff127c7d69b3bdfb4d3e47b64179a0b9900b6351859f3001397dabf"}, - {file = "wrapt-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4f643df3d4419ea3f856c5c3f40fec1d65ea2e89ec812c83f7767c8730f9827a"}, - {file = "wrapt-1.17.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:69c40d4655e078ede067a7095544bcec5a963566e17503e75a3a3e0fe2803b13"}, - {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f495b6754358979379f84534f8dd7a43ff8cff2558dcdea4a148a6e713a758f"}, - {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:baa7ef4e0886a6f482e00d1d5bcd37c201b383f1d314643dfb0367169f94f04c"}, - {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8fc931382e56627ec4acb01e09ce66e5c03c384ca52606111cee50d931a342d"}, - {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8f8909cdb9f1b237786c09a810e24ee5e15ef17019f7cecb207ce205b9b5fcce"}, - {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ad47b095f0bdc5585bced35bd088cbfe4177236c7df9984b3cc46b391cc60627"}, - {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:948a9bd0fb2c5120457b07e59c8d7210cbc8703243225dbd78f4dfc13c8d2d1f"}, - {file = "wrapt-1.17.0-cp38-cp38-win32.whl", hash = "sha256:5ae271862b2142f4bc687bdbfcc942e2473a89999a54231aa1c2c676e28f29ea"}, - {file = "wrapt-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:f335579a1b485c834849e9075191c9898e0731af45705c2ebf70e0cd5d58beed"}, - {file = "wrapt-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d751300b94e35b6016d4b1e7d0e7bbc3b5e1751e2405ef908316c2a9024008a1"}, - {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7264cbb4a18dc4acfd73b63e4bcfec9c9802614572025bdd44d0721983fc1d9c"}, - {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33539c6f5b96cf0b1105a0ff4cf5db9332e773bb521cc804a90e58dc49b10578"}, - {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c30970bdee1cad6a8da2044febd824ef6dc4cc0b19e39af3085c763fdec7de33"}, - {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:bc7f729a72b16ee21795a943f85c6244971724819819a41ddbaeb691b2dd85ad"}, - {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6ff02a91c4fc9b6a94e1c9c20f62ea06a7e375f42fe57587f004d1078ac86ca9"}, - {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dfb7cff84e72e7bf975b06b4989477873dcf160b2fd89959c629535df53d4e0"}, - {file = "wrapt-1.17.0-cp39-cp39-win32.whl", hash = "sha256:2399408ac33ffd5b200480ee858baa58d77dd30e0dd0cab6a8a9547135f30a88"}, - {file = "wrapt-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:4f763a29ee6a20c529496a20a7bcb16a73de27f5da6a843249c7047daf135977"}, - {file = "wrapt-1.17.0-py3-none-any.whl", hash = "sha256:d2c63b93548eda58abf5188e505ffed0229bf675f7c3090f8e36ad55b8cbc371"}, - {file = "wrapt-1.17.0.tar.gz", hash = "sha256:16187aa2317c731170a88ef35e8937ae0f533c402872c1ee5e6d079fcf320801"}, -] - [[package]] name = "xlsxwriter" version = "3.2.0" @@ -5317,4 +5334,4 @@ vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.13" -content-hash = "1ec99125d31558fbc2e78f3aa109da6d35bc476a39af0e0cdf77a6d357f1b214" +content-hash = "e0ef863b4ed5381ffe0ae40e03c327394611d3cbde70fc81e027aaa1d1db3f27" diff --git a/pyproject.toml b/pyproject.toml index 8713fdbf5..8c81a42e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,6 @@ python = "^3.10,<3.13" airbyte-protocol-models-dataclasses = "^0.14" backoff = "*" cachetools = "*" -Deprecated = "~1.2" dpath = "^2.1.6" dunamai = "^1.22.0" genson = "1.3.0" @@ -99,6 +98,11 @@ pytest-cov = "*" pytest-httpserver = "*" pytest-mock = "*" requests-mock = "*" +# Stubs packages for mypy typing +types-requests = "^2.32.0.20241016" +types-python-dateutil = "^2.9.0.20241003" +types-pyyaml = "^6.0.12.20240917" +types-cachetools = "^5.5.0.20240820" [tool.poetry.extras] file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"] @@ -140,7 +144,7 @@ format-fix = {sequence = ["_format-fix-ruff", "_format-fix-prettier"], help = "F # Linting/Typing check tasks _lint-ruff = {cmd = "poetry run ruff check .", help = "Lint with Ruff."} -type-check = {cmd = "bin/run-mypy-on-modified-files.sh", help = "Type check modified files with mypy."} +type-check = {cmd = "poetry run mypy airbyte_cdk", help = "Type check modified files with mypy."} lint = {sequence = ["_lint-ruff", "type-check"], help = "Lint all code. Includes type checking.", ignore_fail = "return_non_zero"} # Lockfile check task diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 234a6e9cd..aac00a889 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -11,9 +11,9 @@ from unittest import mock from unittest.mock import MagicMock, patch +import orjson import pytest import requests -from orjson import orjson from airbyte_cdk import connector_builder from airbyte_cdk.connector_builder.connector_builder_handler import ( diff --git a/unit_tests/connector_builder/test_message_grouper.py b/unit_tests/connector_builder/test_message_grouper.py index bf63f0555..c3fc73308 100644 --- a/unit_tests/connector_builder/test_message_grouper.py +++ b/unit_tests/connector_builder/test_message_grouper.py @@ -6,8 +6,8 @@ from typing import Any, Iterator, List, Mapping from unittest.mock import MagicMock, Mock, patch +import orjson import pytest -from orjson import orjson from airbyte_cdk.connector_builder.message_grouper import MessageGrouper from airbyte_cdk.connector_builder.models import ( diff --git a/unit_tests/destinations/test_destination.py b/unit_tests/destinations/test_destination.py index 4294192e6..14f52be15 100644 --- a/unit_tests/destinations/test_destination.py +++ b/unit_tests/destinations/test_destination.py @@ -6,11 +6,12 @@ import io import json from os import PathLike +from pathlib import Path from typing import Any, Dict, Iterable, List, Mapping, Union from unittest.mock import ANY +import orjson import pytest -from orjson import orjson from airbyte_cdk.destinations import Destination from airbyte_cdk.destinations import destination as destination_module @@ -129,7 +130,7 @@ class OrderedIterableMatcher(Iterable): in an ordered fashion """ - def attempt_consume(self, iterator): + def attempt_consume(self, iterator) -> Any | None: try: return next(iterator) except StopIteration: @@ -138,7 +139,7 @@ def attempt_consume(self, iterator): def __iter__(self): return iter(self.iterable) - def __init__(self, iterable: Iterable): + def __init__(self, iterable: Iterable) -> None: self.iterable = iterable def __eq__(self, other): @@ -149,7 +150,11 @@ def __eq__(self, other): class TestRun: - def test_run_initializes_exception_handler(self, mocker, destination: Destination): + def test_run_initializes_exception_handler( + self, + mocker, + destination: Destination, + ) -> None: mocker.patch.object(destination_module, "init_uncaught_exception_handler") mocker.patch.object(destination, "parse_args") mocker.patch.object(destination, "run_cmd") @@ -158,7 +163,11 @@ def test_run_initializes_exception_handler(self, mocker, destination: Destinatio destination_module.logger ) - def test_run_spec(self, mocker, destination: Destination): + def test_run_spec( + self, + mocker, + destination: Destination, + ) -> None: args = {"command": "spec"} parsed_args = argparse.Namespace(**args) @@ -203,7 +212,12 @@ def test_run_check(self, mocker, destination: Destination, tmp_path): # verify output was correct assert returned_check_result == _wrapped(expected_check_result) - def test_run_check_with_invalid_config(self, mocker, destination: Destination, tmp_path): + def test_run_check_with_invalid_config( + self, + mocker, + destination: Destination, + tmp_path: Path, + ) -> None: file_path = tmp_path / "config.json" invalid_config = {"not": "valid"} write_file(file_path, invalid_config) diff --git a/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py b/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py index c9c0dff70..46b726758 100644 --- a/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +++ b/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py @@ -5,7 +5,7 @@ import logging from unittest.mock import MagicMock, patch -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteStateBlob, diff --git a/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py b/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py index 495ef7345..b65f1f724 100644 --- a/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py +++ b/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py @@ -4,9 +4,9 @@ from typing import Any, List, Mapping, MutableMapping, Optional, Union from unittest.mock import MagicMock +import orjson import pytest import requests_mock -from orjson import orjson from airbyte_cdk.models import ( AirbyteMessage, diff --git a/unit_tests/sources/declarative/schema/source_test/SourceTest.py b/unit_tests/sources/declarative/schema/source_test/SourceTest.py index 8d6a26cb7..d8b3f64d5 100644 --- a/unit_tests/sources/declarative/schema/source_test/SourceTest.py +++ b/unit_tests/sources/declarative/schema/source_test/SourceTest.py @@ -4,5 +4,5 @@ class SourceTest: - def __init__(self): + def __init__(self) -> None: pass diff --git a/unit_tests/sources/declarative/test_concurrent_declarative_source.py b/unit_tests/sources/declarative/test_concurrent_declarative_source.py index dfaca8ca0..efe6026ba 100644 --- a/unit_tests/sources/declarative/test_concurrent_declarative_source.py +++ b/unit_tests/sources/declarative/test_concurrent_declarative_source.py @@ -10,7 +10,7 @@ import freezegun import isodate import pendulum -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import ( AirbyteMessage, diff --git a/unit_tests/sources/test_source.py b/unit_tests/sources/test_source.py index 60c1ecf57..9554d2242 100644 --- a/unit_tests/sources/test_source.py +++ b/unit_tests/sources/test_source.py @@ -8,8 +8,8 @@ from contextlib import nullcontext as does_not_raise from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union +import orjson import pytest -from orjson import orjson from serpyco_rs import SchemaValidationError from airbyte_cdk.models import ( diff --git a/unit_tests/test/test_entrypoint_wrapper.py b/unit_tests/test/test_entrypoint_wrapper.py index fad3db953..a8d02fca9 100644 --- a/unit_tests/test/test_entrypoint_wrapper.py +++ b/unit_tests/test/test_entrypoint_wrapper.py @@ -7,7 +7,7 @@ from unittest import TestCase from unittest.mock import Mock, patch -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteAnalyticsTraceMessage, diff --git a/unit_tests/test_entrypoint.py b/unit_tests/test_entrypoint.py index a5ffb21fb..2e50c11f9 100644 --- a/unit_tests/test_entrypoint.py +++ b/unit_tests/test_entrypoint.py @@ -11,9 +11,9 @@ from unittest.mock import MagicMock, patch import freezegun +import orjson import pytest import requests -from orjson import orjson from airbyte_cdk import AirbyteEntrypoint from airbyte_cdk import entrypoint as entrypoint_module diff --git a/unit_tests/utils/test_traced_exception.py b/unit_tests/utils/test_traced_exception.py index ac3c4318a..0e5b58439 100644 --- a/unit_tests/utils/test_traced_exception.py +++ b/unit_tests/utils/test_traced_exception.py @@ -3,8 +3,8 @@ # +import orjson import pytest -from orjson import orjson from airbyte_cdk.models import ( AirbyteErrorTraceMessage,