diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 34f3c2d1d08f..04bf047260a1 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.50.31 +current_version = 0.50.32 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.github/actions/run-dagger-pipeline/action.yml b/.github/actions/run-dagger-pipeline/action.yml index 8d413a9c658f..afb0d8e69c1a 100644 --- a/.github/actions/run-dagger-pipeline/action.yml +++ b/.github/actions/run-dagger-pipeline/action.yml @@ -63,6 +63,12 @@ inputs: ci_job_key: description: "CI job key" required: false + s3_build_cache_access_key_id: + description: "Gradle S3 Build Cache AWS access key ID" + required: false + s3_build_cache_secret_key: + description: "Gradle S3 Build Cache AWS secret key" + required: false runs: using: "composite" steps: @@ -120,4 +126,6 @@ runs: SPEC_CACHE_GCS_CREDENTIALS: ${{ inputs.spec_cache_gcs_credentials }} DOCKER_HUB_USERNAME: ${{ inputs.docker_hub_username }} DOCKER_HUB_PASSWORD: ${{ inputs.docker_hub_password }} + S3_BUILD_CACHE_ACCESS_KEY_ID: ${{ inputs.s3_build_cache_access_key_id }} + S3_BUILD_CACHE_SECRET_KEY: ${{ inputs.s3_build_cache_secret_key }} CI: "True" diff --git a/.github/workflows/connectors_nightly_build.yml b/.github/workflows/connectors_nightly_build.yml index e34fd9837dd5..c7f7eb7dddf5 100644 --- a/.github/workflows/connectors_nightly_build.yml +++ b/.github/workflows/connectors_nightly_build.yml @@ -41,4 +41,6 @@ jobs: sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }} git_branch: ${{ steps.extract_branch.outputs.branch }} github_token: ${{ secrets.GITHUB_TOKEN }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors ${{ inputs.test-connectors-options || '--concurrency=8 --support-level=certified' }} test" diff --git a/.github/workflows/connectors_tests.yml b/.github/workflows/connectors_tests.yml index b5567e9d6ec9..610e4fc94ad1 100644 --- a/.github/workflows/connectors_tests.yml +++ b/.github/workflows/connectors_tests.yml @@ -63,6 +63,8 @@ jobs: git_branch: ${{ steps.extract_branch.outputs.branch }} git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }} github_token: ${{ env.PAT }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors ${{ github.event.inputs.test-connectors-options }} test" - name: Test connectors [PULL REQUESTS] if: github.event_name == 'pull_request' @@ -76,4 +78,6 @@ jobs: git_branch: ${{ github.head_ref }} git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }} github_token: ${{ env.PAT }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors --modified test" diff --git a/.github/workflows/publish_connectors.yml b/.github/workflows/publish_connectors.yml index 44a7426548b9..5fdc8dfcde60 100644 --- a/.github/workflows/publish_connectors.yml +++ b/.github/workflows/publish_connectors.yml @@ -40,6 +40,8 @@ jobs: sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }} slack_webhook_url: ${{ secrets.PUBLISH_ON_MERGE_SLACK_WEBHOOK }} spec_cache_gcs_credentials: ${{ secrets.SPEC_CACHE_SERVICE_ACCOUNT_KEY_PUBLISH }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors --concurrency=1 --execute-timeout=3600 --metadata-changes-only publish --main-release" - name: Publish connectors [manual] @@ -57,6 +59,8 @@ jobs: sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }} slack_webhook_url: ${{ secrets.PUBLISH_ON_MERGE_SLACK_WEBHOOK }} spec_cache_gcs_credentials: ${{ secrets.SPEC_CACHE_SERVICE_ACCOUNT_KEY_PUBLISH }} + s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} + s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} subcommand: "connectors ${{ github.event.inputs.connectors-options }} publish ${{ github.event.inputs.publish-options }}" set-instatus-incident-on-failure: diff --git a/airbyte-cdk/python/.bumpversion.cfg b/airbyte-cdk/python/.bumpversion.cfg index db7535947639..8ba08b10f517 100644 --- a/airbyte-cdk/python/.bumpversion.cfg +++ b/airbyte-cdk/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.51.41 +current_version = 0.51.44 commit = False [bumpversion:file:setup.py] diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index 42e8e1d1c072..f7e9dd4c0165 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## 0.51.44 +low-code: Allow connector developers to specify the type of an added field + +## 0.51.43 +concurrent cdk: fail fast if a partition raises an exception + +## 0.51.42 +File CDK: Avoid listing all files for check command + ## 0.51.41 Vector DB CDK: Expose stream identifier logic, add field remapping to processing | File CDK: Emit analytics message for used streams diff --git a/airbyte-cdk/python/Dockerfile b/airbyte-cdk/python/Dockerfile index 82752cd9fcd4..65f8368f6333 100644 --- a/airbyte-cdk/python/Dockerfile +++ b/airbyte-cdk/python/Dockerfile @@ -10,7 +10,7 @@ RUN apk --no-cache upgrade \ && apk --no-cache add tzdata build-base # install airbyte-cdk -RUN pip install --prefix=/install airbyte-cdk==0.51.41 +RUN pip install --prefix=/install airbyte-cdk==0.51.44 # build a clean environment FROM base @@ -32,5 +32,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] # needs to be the same as CDK -LABEL io.airbyte.version=0.51.41 +LABEL io.airbyte.version=0.51.44 LABEL io.airbyte.name=airbyte/source-declarative-manifest diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 019e0ba80204..f6c516591b6c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -69,6 +69,10 @@ definitions: - "{{ record['updates'] }}" - "{{ record['MetaData']['LastUpdatedTime'] }}" - "{{ stream_partition['segment_id'] }}" + value_type: + title: Value Type + description: Type of the value. If not specified, the type will be inferred from the value. + "$ref": "#/definitions/ValueType" $parameters: type: object additionalProperties: true @@ -1987,6 +1991,15 @@ definitions: $parameters: type: object additionalProperties: true + ValueType: + title: Value Type + description: A schema type. + type: string + enum: + - string + - number + - integer + - boolean WaitTimeFromHeader: title: Wait Time Extracted From Response Header description: Extract wait time from a HTTP header in the response. diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 4b7b4f5542a4..aab608bacc35 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -10,37 +10,6 @@ from typing_extensions import Literal -class AddedFieldDefinition(BaseModel): - type: Literal['AddedFieldDefinition'] - path: List[str] = Field( - ..., - description='List of strings defining the path where to add the value on the record.', - examples=[['segment_id'], ['metadata', 'segment_id']], - title='Path', - ) - value: str = Field( - ..., - description="Value of the new field. Use {{ record['existing_field'] }} syntax to refer to other fields in the record.", - examples=[ - "{{ record['updates'] }}", - "{{ record['MetaData']['LastUpdatedTime'] }}", - "{{ stream_partition['segment_id'] }}", - ], - title='Value', - ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') - - -class AddFields(BaseModel): - type: Literal['AddFields'] - fields: List[AddedFieldDefinition] = Field( - ..., - description='List of transformations (path and corresponding value) that will be added to the record.', - title='Fields', - ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') - - class AuthFlowType(Enum): oauth2_0 = 'oauth2.0' oauth1_0 = 'oauth1.0' @@ -694,6 +663,13 @@ class LegacySessionTokenAuthenticator(BaseModel): parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') +class ValueType(Enum): + string = 'string' + number = 'number' + integer = 'integer' + boolean = 'boolean' + + class WaitTimeFromHeader(BaseModel): type: Literal['WaitTimeFromHeader'] header: str = Field( @@ -734,6 +710,42 @@ class WaitUntilTimeFromHeader(BaseModel): parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') +class AddedFieldDefinition(BaseModel): + type: Literal['AddedFieldDefinition'] + path: List[str] = Field( + ..., + description='List of strings defining the path where to add the value on the record.', + examples=[['segment_id'], ['metadata', 'segment_id']], + title='Path', + ) + value: str = Field( + ..., + description="Value of the new field. Use {{ record['existing_field'] }} syntax to refer to other fields in the record.", + examples=[ + "{{ record['updates'] }}", + "{{ record['MetaData']['LastUpdatedTime'] }}", + "{{ stream_partition['segment_id'] }}", + ], + title='Value', + ) + value_type: Optional[ValueType] = Field( + None, + description='Type of the value. If not specified, the type will be inferred from the value.', + title='Value Type', + ) + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + + +class AddFields(BaseModel): + type: Literal['AddFields'] + fields: List[AddedFieldDefinition] = Field( + ..., + description='List of transformations (path and corresponding value) that will be added to the record.', + title='Fields', + ) + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + + class ApiKeyAuthenticator(BaseModel): type: Literal['ApiKeyAuthenticator'] api_token: Optional[str] = Field( diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 763a7e22065f..8ab779dcad6b 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -80,6 +80,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import SimpleRetriever as SimpleRetrieverModel from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel from airbyte_cdk.sources.declarative.models.declarative_component_schema import SubstreamPartitionRouter as SubstreamPartitionRouterModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType from airbyte_cdk.sources.declarative.models.declarative_component_schema import WaitTimeFromHeader as WaitTimeFromHeaderModel from airbyte_cdk.sources.declarative.models.declarative_component_schema import WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel from airbyte_cdk.sources.declarative.partition_routers import ListPartitionRouter, SinglePartitionRouter, SubstreamPartitionRouter @@ -232,15 +233,36 @@ def _create_component_from_model(self, model: BaseModel, config: Config, **kwarg @staticmethod def create_added_field_definition(model: AddedFieldDefinitionModel, config: Config, **kwargs: Any) -> AddedFieldDefinition: interpolated_value = InterpolatedString.create(model.value, parameters=model.parameters or {}) - return AddedFieldDefinition(path=model.path, value=interpolated_value, parameters=model.parameters or {}) + return AddedFieldDefinition( + path=model.path, + value=interpolated_value, + value_type=ModelToComponentFactory._json_schema_type_name_to_type(model.value_type), + parameters=model.parameters or {}, + ) def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: added_field_definitions = [ - self._create_component_from_model(model=added_field_definition_model, config=config) + self._create_component_from_model( + model=added_field_definition_model, + value_type=ModelToComponentFactory._json_schema_type_name_to_type(added_field_definition_model.value_type), + config=config, + ) for added_field_definition_model in model.fields ] return AddFields(fields=added_field_definitions, parameters=model.parameters or {}) + @staticmethod + def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]: + if not value_type: + return None + names_to_types = { + ValueType.string: str, + ValueType.number: float, + ValueType.integer: int, + ValueType.boolean: bool, + } + return names_to_types[value_type] + @staticmethod def create_api_key_authenticator( model: ApiKeyAuthenticatorModel, config: Config, token_provider: Optional[TokenProvider] = None, **kwargs: Any diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py index fefe885bbc05..3ebe49a059b5 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py @@ -4,9 +4,10 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Any, List, Mapping, Optional +from typing import Any, List, Optional import requests +from airbyte_cdk.sources.declarative.types import Record @dataclass @@ -23,7 +24,7 @@ def initial_token(self) -> Optional[Any]: """ @abstractmethod - def next_page_token(self, response: requests.Response, last_records: List[Mapping[str, Any]]) -> Optional[Any]: + def next_page_token(self, response: requests.Response, last_records: List[Record]) -> Optional[Any]: """ :param response: response to process :param last_records: records extracted from the response @@ -32,7 +33,7 @@ def next_page_token(self, response: requests.Response, last_records: List[Mappin pass @abstractmethod - def reset(self): + def reset(self) -> None: """ Reset the pagination's inner state """ diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py index 827171bcf705..8732e39ffef5 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py @@ -42,8 +42,12 @@ def next_page_token(self, response: requests.Response, last_records: List[Record return None return self._delegate.next_page_token(response, last_records) - def reset(self): + def reset(self) -> None: self._delegate.reset() def get_page_size(self) -> Optional[int]: return self._delegate.get_page_size() + + @property + def initial_token(self) -> Optional[Any]: + return self._delegate.initial_token diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py index 7802e4edbc85..109f4fb8ca70 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py @@ -3,7 +3,7 @@ # from dataclasses import InitVar, dataclass, field -from typing import Any, List, Mapping, Optional, Union +from typing import Any, List, Mapping, Optional, Type, Union import dpath.util from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString @@ -17,6 +17,7 @@ class AddedFieldDefinition: path: FieldPointer value: Union[InterpolatedString, str] + value_type: Optional[Type[Any]] parameters: InitVar[Mapping[str, Any]] @@ -26,6 +27,7 @@ class ParsedAddFieldDefinition: path: FieldPointer value: InterpolatedString + value_type: Optional[Type[Any]] parameters: InitVar[Mapping[str, Any]] @@ -85,10 +87,10 @@ class AddFields(RecordTransformation): parameters: InitVar[Mapping[str, Any]] _parsed_fields: List[ParsedAddFieldDefinition] = field(init=False, repr=False, default_factory=list) - def __post_init__(self, parameters: Mapping[str, Any]): + def __post_init__(self, parameters: Mapping[str, Any]) -> None: for add_field in self.fields: if len(add_field.path) < 1: - raise f"Expected a non-zero-length path for the AddFields transformation {add_field}" + raise ValueError(f"Expected a non-zero-length path for the AddFields transformation {add_field}") if not isinstance(add_field.value, InterpolatedString): if not isinstance(add_field.value, str): @@ -96,11 +98,16 @@ def __post_init__(self, parameters: Mapping[str, Any]): else: self._parsed_fields.append( ParsedAddFieldDefinition( - add_field.path, InterpolatedString.create(add_field.value, parameters=parameters), parameters=parameters + add_field.path, + InterpolatedString.create(add_field.value, parameters=parameters), + value_type=add_field.value_type, + parameters=parameters, ) ) else: - self._parsed_fields.append(ParsedAddFieldDefinition(add_field.path, add_field.value, parameters={})) + self._parsed_fields.append( + ParsedAddFieldDefinition(add_field.path, add_field.value, value_type=add_field.value_type, parameters={}) + ) def transform( self, @@ -109,12 +116,15 @@ def transform( stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, ) -> Record: + if config is None: + config = {} kwargs = {"record": record, "stream_state": stream_state, "stream_slice": stream_slice} for parsed_field in self._parsed_fields: - value = parsed_field.value.eval(config, **kwargs) + valid_types = (parsed_field.value_type,) if parsed_field.value_type else None + value = parsed_field.value.eval(config, valid_types=valid_types, **kwargs) dpath.util.new(record, parsed_field.path, value) return record - def __eq__(self, other): - return self.__dict__ == other.__dict__ + def __eq__(self, other: Any) -> bool: + return bool(self.__dict__ == other.__dict__) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py index 560bf39e1b08..dd91864a2537 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from typing import Any, Mapping, Optional -from airbyte_cdk.sources.declarative.types import Config, StreamSlice, StreamState +from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState @dataclass @@ -18,7 +18,7 @@ class RecordTransformation: @abstractmethod def transform( self, - record: Mapping[str, Any], + record: Record, config: Optional[Config] = None, stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py index 8019d8f875d3..2008a143942c 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py @@ -4,7 +4,7 @@ import logging import traceback -from typing import TYPE_CHECKING, List, Optional, Tuple +from typing import TYPE_CHECKING, Optional, Tuple from airbyte_cdk.sources import Source from airbyte_cdk.sources.file_based.availability_strategy import AbstractFileBasedAvailabilityStrategy @@ -55,28 +55,32 @@ def check_availability_and_parsability( """ parser = stream.get_parser() try: - files = self._check_list_files(stream) + file = self._check_list_files(stream) if not parser.parser_max_n_files_for_parsability == 0: - self._check_parse_record(stream, files[0], logger) + self._check_parse_record(stream, file, logger) else: # If the parser is set to not check parsability, we still want to check that we can open the file. - handle = stream.stream_reader.open_file(files[0], parser.file_read_mode, None, logger) + handle = stream.stream_reader.open_file(file, parser.file_read_mode, None, logger) handle.close() except CheckAvailabilityError: return False, "".join(traceback.format_exc()) return True, None - def _check_list_files(self, stream: "AbstractFileBasedStream") -> List[RemoteFile]: + def _check_list_files(self, stream: "AbstractFileBasedStream") -> RemoteFile: + """ + Check that we can list files from the stream. + + Returns the first file if successful, otherwise raises a CheckAvailabilityError. + """ try: - files = stream.list_files() + file = next(iter(stream.get_files())) + except StopIteration: + raise CheckAvailabilityError(FileBasedSourceError.EMPTY_STREAM, stream=stream.name) except Exception as exc: raise CheckAvailabilityError(FileBasedSourceError.ERROR_LISTING_FILES, stream=stream.name) from exc - if not files: - raise CheckAvailabilityError(FileBasedSourceError.EMPTY_STREAM, stream=stream.name) - - return files + return file def _check_parse_record(self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger) -> None: parser = stream.get_parser() diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py index 73c4216c37c6..474a271a48d7 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py @@ -3,7 +3,7 @@ # from abc import abstractmethod -from functools import cached_property, lru_cache +from functools import cache, cached_property, lru_cache from typing import Any, Dict, Iterable, List, Mapping, Optional, Type from airbyte_cdk.models import SyncMode @@ -59,10 +59,21 @@ def __init__( def primary_key(self) -> PrimaryKeyType: ... - @abstractmethod + @cache def list_files(self) -> List[RemoteFile]: """ List all files that belong to the stream. + + The output of this method is cached so we don't need to list the files more than once. + This means we won't pick up changes to the files during a sync. This meethod uses the + get_files method which is implemented by the concrete stream class. + """ + return list(self.get_files()) + + @abstractmethod + def get_files(self) -> Iterable[RemoteFile]: + """ + List all files that belong to the stream as defined by the stream's globs. """ ... diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py index bed922364650..89849d7fbad1 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py @@ -199,14 +199,11 @@ def _get_raw_json_schema(self) -> JsonSchema: return schema - @cache - def list_files(self) -> List[RemoteFile]: + def get_files(self) -> Iterable[RemoteFile]: """ - List all files that belong to the stream as defined by the stream's globs. - The output of this method is cached so we don't need to list the files more than once. - This means we won't pick up changes to the files during a sync. + Return all files that belong to the stream as defined by the stream's globs. """ - return list(self.stream_reader.get_matching_files(self.config.globs or [], self.config.legacy_prefix, self.logger)) + return self.stream_reader.get_matching_files(self.config.globs or [], self.config.legacy_prefix, self.logger) def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]: loop = asyncio.get_event_loop() diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py index b4c377e2c12c..76a8349f63de 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py @@ -27,11 +27,16 @@ def generate_partitions(self, partition_generator: PartitionGenerator, sync_mode Generate partitions from a partition generator and put them in a queue. When all the partitions are added to the queue, a sentinel is added to the queue to indicate that all the partitions have been generated. + If an exception is encountered, the exception will be caught and put in the queue. + This method is meant to be called in a separate thread. :param partition_generator: The partition Generator :param sync_mode: The sync mode used :return: """ - for partition in partition_generator.generate(sync_mode=sync_mode): - self._queue.put(partition) - self._queue.put(self._sentinel) + try: + for partition in partition_generator.generate(sync_mode=sync_mode): + self._queue.put(partition) + self._queue.put(self._sentinel) + except Exception as e: + self._queue.put(e) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partition_reader.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partition_reader.py index ce13b48dc56b..0bc9c35117a6 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partition_reader.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partition_reader.py @@ -10,7 +10,7 @@ class PartitionReader: """ - Generates records from a partition and puts them in a queuea. + Generates records from a partition and puts them in a queue. """ def __init__(self, queue: Queue[QueueItem]) -> None: @@ -24,10 +24,15 @@ def process_partition(self, partition: Partition) -> None: Process a partition and put the records in the output queue. When all the partitions are added to the queue, a sentinel is added to the queue to indicate that all the partitions have been generated. + If an exception is encountered, the exception will be caught and put in the queue. + This method is meant to be called from a thread. :param partition: The partition to read data from :return: None """ - for record in partition.read(): - self._queue.put(record) - self._queue.put(PartitionCompleteSentinel(partition)) + try: + for record in partition.read(): + self._queue.put(record) + self._queue.put(PartitionCompleteSentinel(partition)) + except Exception as e: + self._queue.put(e) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partitions/types.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partitions/types.py index d705555c857f..05d0ba4567a6 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partitions/types.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/partitions/types.py @@ -26,4 +26,4 @@ def __init__(self, partition: Partition): """ Typedef representing the items that can be added to the ThreadBasedConcurrentStream """ -QueueItem = Union[Record, Partition, PartitionCompleteSentinel, PARTITIONS_GENERATED_SENTINEL, Partition] +QueueItem = Union[Record, Partition, PartitionCompleteSentinel, PARTITIONS_GENERATED_SENTINEL, Partition, Exception] diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py index 4e5fa36ae5c8..8a9067f95399 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/thread_based_concurrent_stream.py @@ -69,15 +69,16 @@ def read(self) -> Iterable[Record]: Algorithm: 1. Submit a future to generate the stream's partition to process. - This has to be done asynchronously because we sometimes need to submit requests to the API to generate all partitions (eg for substreams). - - The future will add the partitions to process on a work queue + - The future will add the partitions to process on a work queue. 2. Continuously poll work from the work queue until all partitions are generated and processed + - If the next work item is an Exception, stop the threadpool and raise it. - If the next work item is a partition, submit a future to process it. - - The future will add the records to emit on the work queue - - Add the partitions to the partitions_to_done dict so we know it needs to complete for the sync to succeed - - If the next work item is a record, yield the record - - If the next work item is PARTITIONS_GENERATED_SENTINEL, all the partitions were generated - - If the next work item is a PartitionCompleteSentinel, a partition is done processing - - Update the value in partitions_to_done to True so we know the partition is completed + - The future will add the records to emit on the work queue. + - Add the partitions to the partitions_to_done dict so we know it needs to complete for the sync to succeed. + - If the next work item is a record, yield the record. + - If the next work item is PARTITIONS_GENERATED_SENTINEL, all the partitions were generated. + - If the next work item is a PartitionCompleteSentinel, a partition is done processing. + - Update the value in partitions_to_done to True so we know the partition is completed. """ self._logger.debug(f"Processing stream slices for {self.name} (sync_mode: full_refresh)") futures: List[Future[Any]] = [] @@ -93,26 +94,32 @@ def read(self) -> Iterable[Record]: partitions_to_done: Dict[Partition, bool] = {} finished_partitions = False - while record_or_partition := queue.get(block=True, timeout=self._timeout_seconds): - if record_or_partition == PARTITIONS_GENERATED_SENTINEL: + while record_or_partition_or_exception := queue.get(block=True, timeout=self._timeout_seconds): + if isinstance(record_or_partition_or_exception, Exception): + # An exception was raised while processing the stream + # Stop the threadpool and raise it + self._stop_and_raise_exception(record_or_partition_or_exception) + elif record_or_partition_or_exception == PARTITIONS_GENERATED_SENTINEL: # All partitions were generated finished_partitions = True - elif isinstance(record_or_partition, PartitionCompleteSentinel): + elif isinstance(record_or_partition_or_exception, PartitionCompleteSentinel): # All records for a partition were generated - if record_or_partition.partition not in partitions_to_done: + if record_or_partition_or_exception.partition not in partitions_to_done: raise RuntimeError( - f"Received sentinel for partition {record_or_partition.partition} that was not in partitions. This is indicative of a bug in the CDK. Please contact support.partitions:\n{partitions_to_done}" + f"Received sentinel for partition {record_or_partition_or_exception.partition} that was not in partitions. This is indicative of a bug in the CDK. Please contact support.partitions:\n{partitions_to_done}" ) - partitions_to_done[record_or_partition.partition] = True - elif isinstance(record_or_partition, Record): + partitions_to_done[record_or_partition_or_exception.partition] = True + elif isinstance(record_or_partition_or_exception, Record): # Emit records - yield record_or_partition - elif isinstance(record_or_partition, Partition): + yield record_or_partition_or_exception + elif isinstance(record_or_partition_or_exception, Partition): # A new partition was generated and must be processed - partitions_to_done[record_or_partition] = False + partitions_to_done[record_or_partition_or_exception] = False if self._slice_logger.should_log_slice_message(self._logger): - self._message_repository.emit_message(self._slice_logger.create_slice_log_message(record_or_partition.to_slice())) - self._submit_task(futures, partition_reader.process_partition, record_or_partition) + self._message_repository.emit_message( + self._slice_logger.create_slice_log_message(record_or_partition_or_exception.to_slice()) + ) + self._submit_task(futures, partition_reader.process_partition, record_or_partition_or_exception) if finished_partitions and all(partitions_to_done.values()): # All partitions were generated and process. We're done here break @@ -135,10 +142,17 @@ def _wait_while_too_many_pending_futures(self, futures: List[Future[Any]]) -> No def _check_for_errors(self, futures: List[Future[Any]]) -> None: exceptions_from_futures = [f for f in [future.exception() for future in futures] if f is not None] if exceptions_from_futures: - raise RuntimeError(f"Failed reading from stream {self.name} with errors: {exceptions_from_futures}") - futures_not_done = [f for f in futures if not f.done()] - if futures_not_done: - raise RuntimeError(f"Failed reading from stream {self.name} with futures not done: {futures_not_done}") + exception = RuntimeError(f"Failed reading from stream {self.name} with errors: {exceptions_from_futures}") + self._stop_and_raise_exception(exception) + else: + futures_not_done = [f for f in futures if not f.done()] + if futures_not_done: + exception = RuntimeError(f"Failed reading from stream {self.name} with futures not done: {futures_not_done}") + self._stop_and_raise_exception(exception) + + def _stop_and_raise_exception(self, exception: BaseException) -> None: + self._threadpool.shutdown(wait=False, cancel_futures=True) + raise exception @property def name(self) -> str: diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index d8cbf2f60e60..eb676375dc59 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -26,7 +26,7 @@ name="airbyte-cdk", # The version of the airbyte-cdk package is used at runtime to validate manifests. That validation must be # updated if our semver format changes such as using release candidate versions. - version="0.51.41", + version="0.51.44", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_jinja.py b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_jinja.py index cb312d18977e..097afbb3487f 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_jinja.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_jinja.py @@ -19,6 +19,20 @@ def test_get_value_from_config(): assert val == "2022-01-01" +@pytest.mark.parametrize( + "valid_types, expected_value", + [ + pytest.param((str,), "1234J", id="test_value_is_a_string_if_valid_types_is_str"), + pytest.param(None, 1234j, id="test_value_is_interpreted_as_complex_number_by_default"), + ], +) +def test_get_value_with_complex_number(valid_types, expected_value): + s = "{{ config['value'] }}" + config = {"value": "1234J"} + val = interpolation.eval(s, config, valid_types=valid_types) + assert val == expected_value + + def test_get_value_from_stream_slice(): s = "{{ stream_slice['date'] }}" config = {"date": "2022-01-01"} diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py index a1c9eec8cd0e..602ab3d50424 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py @@ -1362,7 +1362,7 @@ def test_remove_fields(self): expected = [RemoveFields(field_pointers=[["path", "to", "field1"], ["path2"]], parameters={})] assert stream.retriever.record_selector.transformations == expected - def test_add_fields(self): + def test_add_fields_no_value_type(self): content = f""" the_stream: type: DeclarativeStream @@ -1374,26 +1374,142 @@ def test_add_fields(self): - path: ["field1"] value: "static_value" """ - parsed_manifest = YamlDeclarativeSource._parse(content) - resolved_manifest = resolver.preprocess_manifest(parsed_manifest) - resolved_manifest["type"] = "DeclarativeSource" - stream_manifest = transformer.propagate_types_and_parameters("", resolved_manifest["the_stream"], {}) - - stream = factory.create_component(model_type=DeclarativeStreamModel, component_definition=stream_manifest, config=input_config) + expected = [ + AddFields( + fields=[ + AddedFieldDefinition( + path=["field1"], + value=InterpolatedString(string="static_value", default="static_value", parameters={}), + value_type=None, + parameters={}, + ) + ], + parameters={}, + ) + ] + self._test_add_fields(content, expected) - assert isinstance(stream, DeclarativeStream) + def test_add_fields_value_type_is_string(self): + content = f""" + the_stream: + type: DeclarativeStream + $parameters: + {self.base_parameters} + transformations: + - type: AddFields + fields: + - path: ["field1"] + value: "static_value" + value_type: string + """ expected = [ AddFields( fields=[ AddedFieldDefinition( path=["field1"], value=InterpolatedString(string="static_value", default="static_value", parameters={}), + value_type=str, + parameters={}, + ) + ], + parameters={}, + ) + ] + self._test_add_fields(content, expected) + + def test_add_fields_value_type_is_number(self): + content = f""" + the_stream: + type: DeclarativeStream + $parameters: + {self.base_parameters} + transformations: + - type: AddFields + fields: + - path: ["field1"] + value: "1" + value_type: number + """ + expected = [ + AddFields( + fields=[ + AddedFieldDefinition( + path=["field1"], + value=InterpolatedString(string="1", default="1", parameters={}), + value_type=float, parameters={}, ) ], parameters={}, ) ] + self._test_add_fields(content, expected) + + def test_add_fields_value_type_is_integer(self): + content = f""" + the_stream: + type: DeclarativeStream + $parameters: + {self.base_parameters} + transformations: + - type: AddFields + fields: + - path: ["field1"] + value: "1" + value_type: integer + """ + expected = [ + AddFields( + fields=[ + AddedFieldDefinition( + path=["field1"], + value=InterpolatedString(string="1", default="1", parameters={}), + value_type=int, + parameters={}, + ) + ], + parameters={}, + ) + ] + self._test_add_fields(content, expected) + + def test_add_fields_value_type_is_boolean(self): + content = f""" + the_stream: + type: DeclarativeStream + $parameters: + {self.base_parameters} + transformations: + - type: AddFields + fields: + - path: ["field1"] + value: False + value_type: boolean + """ + expected = [ + AddFields( + fields=[ + AddedFieldDefinition( + path=["field1"], + value=InterpolatedString(string="False", default="False", parameters={}), + value_type=bool, + parameters={}, + ) + ], + parameters={}, + ) + ] + self._test_add_fields(content, expected) + + def _test_add_fields(self, content, expected): + parsed_manifest = YamlDeclarativeSource._parse(content) + resolved_manifest = resolver.preprocess_manifest(parsed_manifest) + resolved_manifest["type"] = "DeclarativeSource" + stream_manifest = transformer.propagate_types_and_parameters("", resolved_manifest["the_stream"], {}) + + stream = factory.create_component(model_type=DeclarativeStreamModel, component_definition=stream_manifest, config=input_config) + + assert isinstance(stream, DeclarativeStream) assert stream.retriever.record_selector.transformations == expected def test_default_schema_loader(self): diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py index 1386f2847652..a10422fd71bf 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py @@ -2,7 +2,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from typing import Any, List, Mapping, Tuple +from typing import Any, List, Mapping, Optional, Tuple import pytest from airbyte_cdk.sources.declarative.transformations import AddFields @@ -11,12 +11,22 @@ @pytest.mark.parametrize( - ["input_record", "field", "kwargs", "expected"], + ["input_record", "field", "field_type", "kwargs", "expected"], [ - pytest.param({"k": "v"}, [(["path"], "static_value")], {}, {"k": "v", "path": "static_value"}, id="add new static value"), + pytest.param({"k": "v"}, [(["path"], "static_value")], None, {}, {"k": "v", "path": "static_value"}, id="add new static value"), + pytest.param({"k": "v"}, [(["path"], "{{ 1 }}")], None, {}, {"k": "v", "path": 1}, id="add an expression evaluated as a number"), + pytest.param( + {"k": "v"}, + [(["path"], "{{ 1 }}")], + str, + {}, + {"k": "v", "path": "1"}, + id="add an expression evaluated as a string using the value_type field", + ), pytest.param( {"k": "v"}, [(["path"], "static_value"), (["path2"], "static_value2")], + None, {}, {"k": "v", "path": "static_value", "path2": "static_value2"}, id="add new multiple static values", @@ -24,15 +34,17 @@ pytest.param( {"k": "v"}, [(["nested", "path"], "static_value")], + None, {}, {"k": "v", "nested": {"path": "static_value"}}, id="set static value at nested path", ), - pytest.param({"k": "v"}, [(["k"], "new_value")], {}, {"k": "new_value"}, id="update value which already exists"), - pytest.param({"k": [0, 1]}, [(["k", 3], "v")], {}, {"k": [0, 1, None, "v"]}, id="Set element inside array"), + pytest.param({"k": "v"}, [(["k"], "new_value")], None, {}, {"k": "new_value"}, id="update value which already exists"), + pytest.param({"k": [0, 1]}, [(["k", 3], "v")], None, {}, {"k": [0, 1, None, "v"]}, id="Set element inside array"), pytest.param( {"k": "v"}, [(["k2"], '{{ config["shop"] }}')], + None, {"config": {"shop": "in-n-out"}}, {"k": "v", "k2": "in-n-out"}, id="set a value from the config using bracket notation", @@ -40,6 +52,7 @@ pytest.param( {"k": "v"}, [(["k2"], "{{ config.shop }}")], + None, {"config": {"shop": "in-n-out"}}, {"k": "v", "k2": "in-n-out"}, id="set a value from the config using dot notation", @@ -47,6 +60,7 @@ pytest.param( {"k": "v"}, [(["k2"], '{{ stream_state["cursor"] }}')], + None, {"stream_state": {"cursor": "t0"}}, {"k": "v", "k2": "t0"}, id="set a value from the state using bracket notation", @@ -54,6 +68,7 @@ pytest.param( {"k": "v"}, [(["k2"], "{{ stream_state.cursor }}")], + None, {"stream_state": {"cursor": "t0"}}, {"k": "v", "k2": "t0"}, id="set a value from the state using dot notation", @@ -61,6 +76,7 @@ pytest.param( {"k": "v"}, [(["k2"], '{{ stream_slice["start_date"] }}')], + None, {"stream_slice": {"start_date": "oct1"}}, {"k": "v", "k2": "oct1"}, id="set a value from the stream slice using bracket notation", @@ -68,6 +84,7 @@ pytest.param( {"k": "v"}, [(["k2"], "{{ stream_slice.start_date }}")], + None, {"stream_slice": {"start_date": "oct1"}}, {"k": "v", "k2": "oct1"}, id="set a value from the stream slice using dot notation", @@ -75,6 +92,7 @@ pytest.param( {"k": "v"}, [(["k2"], "{{ record.k }}")], + None, {}, {"k": "v", "k2": "v"}, id="set a value from a field in the record using dot notation", @@ -82,6 +100,7 @@ pytest.param( {"k": "v"}, [(["k2"], '{{ record["k"] }}')], + None, {}, {"k": "v", "k2": "v"}, id="set a value from a field in the record using bracket notation", @@ -89,6 +108,7 @@ pytest.param( {"k": {"nested": "v"}}, [(["k2"], "{{ record.k.nested }}")], + None, {}, {"k": {"nested": "v"}, "k2": "v"}, id="set a value from a nested field in the record using bracket notation", @@ -96,15 +116,20 @@ pytest.param( {"k": {"nested": "v"}}, [(["k2"], '{{ record["k"]["nested"] }}')], + None, {}, {"k": {"nested": "v"}, "k2": "v"}, id="set a value from a nested field in the record using bracket notation", ), - pytest.param({"k": "v"}, [(["k2"], "{{ 2 + 2 }}")], {}, {"k": "v", "k2": 4}, id="set a value from a jinja expression"), + pytest.param({"k": "v"}, [(["k2"], "{{ 2 + 2 }}")], None, {}, {"k": "v", "k2": 4}, id="set a value from a jinja expression"), ], ) def test_add_fields( - input_record: Mapping[str, Any], field: List[Tuple[FieldPointer, str]], kwargs: Mapping[str, Any], expected: Mapping[str, Any] + input_record: Mapping[str, Any], + field: List[Tuple[FieldPointer, str]], + field_type: Optional[str], + kwargs: Mapping[str, Any], + expected: Mapping[str, Any], ): - inputs = [AddedFieldDefinition(path=v[0], value=v[1], parameters={}) for v in field] + inputs = [AddedFieldDefinition(path=v[0], value=v[1], value_type=field_type, parameters={}) for v in field] assert AddFields(fields=inputs, parameters={"alas": "i live"}).transform(input_record, **kwargs) == expected diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py b/airbyte-cdk/python/unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py index 7b94f6f4db29..6b849d6a89d0 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/availability_strategy/test_default_file_based_availability_strategy.py @@ -44,19 +44,30 @@ def test_given_file_extension_does_not_match_when_check_availability_and_parsabi example we've seen was for JSONL parser but the file extension was just `.json`. Note that there we more than one record extracted from this stream so it's not just that the file is one JSON object """ - self._stream.list_files.return_value = [_FILE_WITH_UNKNOWN_EXTENSION] + self._stream.get_files.return_value = [_FILE_WITH_UNKNOWN_EXTENSION] self._parser.parse_records.return_value = [{"a record": 1}] is_available, reason = self._strategy.check_availability_and_parsability(self._stream, Mock(), Mock()) assert is_available + def test_not_available_given_no_files(self) -> None: + """ + If no files are returned, then the stream is not available. + """ + self._stream.get_files.return_value = [] + + is_available, reason = self._strategy.check_availability_and_parsability(self._stream, Mock(), Mock()) + + assert not is_available + assert "No files were identified in the stream" in reason + def test_parse_records_is_not_called_with_parser_max_n_files_for_parsability_set(self) -> None: """ If the stream parser sets parser_max_n_files_for_parsability to 0, then we should not call parse_records on it """ self._parser.parser_max_n_files_for_parsability = 0 - self._stream.list_files.return_value = [_FILE_WITH_UNKNOWN_EXTENSION] + self._stream.get_files.return_value = [_FILE_WITH_UNKNOWN_EXTENSION] is_available, reason = self._strategy.check_availability_and_parsability(self._stream, Mock(), Mock()) diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/scenario_builder.py b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/scenario_builder.py index a2c18565feec..c43db3a76dbd 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/scenario_builder.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/scenarios/scenario_builder.py @@ -1,11 +1,10 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - from abc import ABC, abstractmethod from copy import deepcopy from dataclasses import dataclass, field -from typing import Any, Generic, List, Mapping, Optional, Tuple, Type, TypeVar +from typing import Any, Generic, List, Mapping, Optional, Set, Tuple, Type, TypeVar from airbyte_cdk.models import AirbyteAnalyticsTraceMessage, SyncMode from airbyte_cdk.sources import AbstractSource @@ -46,7 +45,10 @@ def __init__( expected_read_error: Tuple[Optional[Type[Exception]], Optional[str]], incremental_scenario_config: Optional[IncrementalScenarioConfig], expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]] = None, + log_levels: Optional[Set[str]] = None, ): + if log_levels is None: + log_levels = {"ERROR", "WARN", "WARNING"} self.name = name self.config = config self.source = source @@ -60,6 +62,7 @@ def __init__( self.expected_read_error = expected_read_error self.incremental_scenario_config = incremental_scenario_config self.expected_analytics = expected_analytics + self.log_levels = log_levels self.validate() def validate(self) -> None: @@ -112,6 +115,7 @@ def __init__(self) -> None: self._incremental_scenario_config: Optional[IncrementalScenarioConfig] = None self._expected_analytics: Optional[List[AirbyteAnalyticsTraceMessage]] = None self.source_builder: Optional[SourceBuilder[SourceType]] = None + self._log_levels = None def set_name(self, name: str) -> "TestScenarioBuilder[SourceType]": self._name = name @@ -157,6 +161,10 @@ def set_expected_read_error(self, error: Type[Exception], message: str) -> "Test self._expected_read_error = error, message return self + def set_log_levels(self, levels: Set[str]) -> "TestScenarioBuilder": + self._log_levels = levels + return self + def set_source_builder(self, source_builder: SourceBuilder[SourceType]) -> "TestScenarioBuilder[SourceType]": self.source_builder = source_builder return self @@ -188,6 +196,7 @@ def build(self) -> "TestScenario[SourceType]": self._expected_read_error, self._incremental_scenario_config, self._expected_analytics, + self._log_levels, ) def _configured_catalog(self, sync_mode: SyncMode) -> Optional[Mapping[str, Any]]: diff --git a/airbyte-cdk/python/unit_tests/sources/file_based/test_scenarios.py b/airbyte-cdk/python/unit_tests/sources/file_based/test_scenarios.py index 003c422030d4..9deb5802d788 100644 --- a/airbyte-cdk/python/unit_tests/sources/file_based/test_scenarios.py +++ b/airbyte-cdk/python/unit_tests/sources/file_based/test_scenarios.py @@ -75,7 +75,7 @@ def run_test_read_incremental( def _verify_read_output(output: Dict[str, Any], scenario: TestScenario[AbstractSource]) -> None: records, logs = output["records"], output["logs"] - logs = [log for log in logs if log.get("level") in ("ERROR", "WARN", "WARNING")] + logs = [log for log in logs if log.get("level") in scenario.log_levels] expected_records = scenario.expected_records assert len(records) == len(expected_records) for actual, expected in zip(records, expected_records): diff --git a/airbyte-integrations/connectors/source-klarna/unit_tests/__init__.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/__init__.py similarity index 100% rename from airbyte-integrations/connectors/source-klarna/unit_tests/__init__.py rename to airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/__init__.py diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py new file mode 100644 index 000000000000..67d099acf905 --- /dev/null +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py @@ -0,0 +1,62 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# +import logging +from typing import Any, List, Mapping, Optional, Tuple, Union + +from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade +from airbyte_protocol.models import ConfiguredAirbyteStream +from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder + + +class StreamFacadeSource(AbstractSource): + def __init__(self, streams: List[Stream], max_workers: int): + self._streams = streams + self._max_workers = max_workers + + def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + return [StreamFacade.create_from_stream(stream, self, stream.logger, self._max_workers) for stream in self._streams] + + @property + def message_repository(self) -> Union[None, MessageRepository]: + return InMemoryMessageRepository() + + def spec(self, logger: logging.Logger) -> ConnectorSpecification: + return ConnectorSpecification(connectionSpecification={}) + + def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog: + return ConfiguredAirbyteCatalog( + streams=[ + ConfiguredAirbyteStream( + stream=s.as_airbyte_stream(), + sync_mode=SyncMode.full_refresh, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + for s in self._streams + ] + ) + + +class StreamFacadeSourceBuilder(SourceBuilder[StreamFacadeSource]): + def __init__(self): + self._source = None + self._streams = [] + self._max_workers = 1 + + def set_streams(self, streams: List[Stream]) -> "StreamFacadeSourceBuilder": + self._streams = streams + return self + + def set_max_workers(self, max_workers: int): + self._max_workers = max_workers + return self + + def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> StreamFacadeSource: + return StreamFacadeSource(self._streams, self._max_workers) diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py new file mode 100644 index 000000000000..773bae090a98 --- /dev/null +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py @@ -0,0 +1,386 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# +from typing import Any, Iterable, List, Mapping, Optional, Union + +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.core import StreamData +from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder +from unit_tests.sources.streams.concurrent.scenarios.stream_facade_builder import StreamFacadeSourceBuilder + + +class _MockStream(Stream): + def __init__( + self, + slice_key, + slice_values_to_records_or_exception: Mapping[Optional[str], List[Union[Mapping[str, Any], Exception]]], + name, + json_schema, + primary_key=None, + ): + self._slice_key = slice_key + self._slice_values_to_records = slice_values_to_records_or_exception + self._name = name + self._json_schema = json_schema + self._primary_key = primary_key + + def read_records( + self, + sync_mode: SyncMode, + cursor_field: Optional[List[str]] = None, + stream_slice: Optional[Mapping[str, Any]] = None, + stream_state: Optional[Mapping[str, Any]] = None, + ) -> Iterable[StreamData]: + for record_or_exception in self._get_record_or_exception_iterable(stream_slice): + if isinstance(record_or_exception, Exception): + raise record_or_exception + else: + yield record_or_exception + + def _get_record_or_exception_iterable( + self, stream_slice: Optional[Mapping[str, Any]] = None + ) -> Iterable[Union[Mapping[str, Any], Exception]]: + if stream_slice is None: + return self._slice_values_to_records[None] + else: + return self._slice_values_to_records[stream_slice[self._slice_key]] + + @property + def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]: + return self._primary_key + + @property + def name(self) -> str: + return self._name + + def get_json_schema(self) -> Mapping[str, Any]: + return self._json_schema + + def stream_slices( + self, *, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None + ) -> Iterable[Optional[Mapping[str, Any]]]: + if self._slice_key: + for slice_value in self._slice_values_to_records.keys(): + yield {self._slice_key: slice_value} + else: + yield None + + +_stream1 = _MockStream( + None, + {None: [{"id": "1"}, {"id": "2"}]}, + "stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, +) + +_stream_raising_exception = _MockStream( + None, + {None: [{"id": "1"}, ValueError("test exception")]}, + "stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, +) + +_stream_with_primary_key = _MockStream( + None, + {None: [{"id": "1"}, {"id": "2"}]}, + "stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + primary_key="id", +) + +_stream2 = _MockStream( + None, + {None: [{"id": "A"}, {"id": "B"}]}, + "stream2", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, +) + +_stream_with_single_slice = _MockStream( + "slice_key", + {"s1": [{"id": "1"}, {"id": "2"}]}, + "stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, +) + +_stream_with_multiple_slices = _MockStream( + "slice_key", + { + "s1": [{"id": "1"}, {"id": "2"}], + "s2": [{"id": "3"}, {"id": "4"}], + }, + "stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, +) + +test_stream_facade_single_stream = ( + TestScenarioBuilder() + .set_name("test_stream_facade_single_stream") + .set_config({}) + .set_source_builder(StreamFacadeSourceBuilder().set_streams([_stream1])) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .set_expected_logs( + { + "read": [ + {"level": "INFO", "message": "Starting syncing StreamFacadeSource"}, + {"level": "INFO", "message": "Marking stream stream1 as STARTED"}, + {"level": "INFO", "message": "Syncing stream: stream1"}, + {"level": "INFO", "message": "Marking stream stream1 as RUNNING"}, + {"level": "INFO", "message": "Read 2 records from stream1 stream"}, + {"level": "INFO", "message": "Marking stream stream1 as STOPPED"}, + {"level": "INFO", "message": "Finished syncing stream1"}, + {"level": "INFO", "message": "StreamFacadeSource runtimes"}, + {"level": "INFO", "message": "Finished syncing StreamFacadeSource"}, + ] + } + ) + .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"}) + .build() +) + +test_stream_facade_raises_exception = ( + TestScenarioBuilder() + .set_name("test_stream_facade_raises_exception") + .set_config({}) + .set_source_builder(StreamFacadeSourceBuilder().set_streams([_stream_raising_exception])) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .set_expected_read_error(ValueError, "test exception") + .build() +) + +test_stream_facade_single_stream_with_primary_key = ( + TestScenarioBuilder() + .set_name("test_stream_facade_stream_with_primary_key") + .set_config({}) + .set_source_builder(StreamFacadeSourceBuilder().set_streams([_stream1])) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .build() +) + +test_stream_facade_multiple_streams = ( + TestScenarioBuilder() + .set_name("test_stream_facade_multiple_streams") + .set_config({}) + .set_source_builder(StreamFacadeSourceBuilder().set_streams([_stream1, _stream2])) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + {"data": {"id": "A"}, "stream": "stream2"}, + {"data": {"id": "B"}, "stream": "stream2"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + }, + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream2", + "supported_sync_modes": ["full_refresh"], + }, + ] + } + ) + .build() +) + +test_stream_facade_single_stream_with_single_slice = ( + TestScenarioBuilder() + .set_name("test_stream_facade_single_stream_with_single_slice") + .set_config({}) + .set_source_builder(StreamFacadeSourceBuilder().set_streams([_stream1])) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .build() +) + +test_stream_facade_single_stream_with_multiple_slices = ( + TestScenarioBuilder() + .set_name("test_stream_facade_single_stream_with_multiple_slice") + .set_config({}) + .set_source_builder(StreamFacadeSourceBuilder().set_streams([_stream_with_multiple_slices])) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + {"data": {"id": "3"}, "stream": "stream1"}, + {"data": {"id": "4"}, "stream": "stream1"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .build() +) + +test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two = ( + TestScenarioBuilder() + .set_name("test_stream_facade_single_stream_with_multiple_slice_with_concurrency_level_two") + .set_config({}) + .set_source_builder(StreamFacadeSourceBuilder().set_streams([_stream_with_multiple_slices])) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + {"data": {"id": "3"}, "stream": "stream1"}, + {"data": {"id": "4"}, "stream": "stream1"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .build() +) diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py new file mode 100644 index 000000000000..781a73a3ccf6 --- /dev/null +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py @@ -0,0 +1,57 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from pathlib import PosixPath + +import pytest +from _pytest.capture import CaptureFixture +from freezegun import freeze_time +from pytest import LogCaptureFixture +from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenario +from unit_tests.sources.file_based.test_scenarios import verify_discover, verify_read +from unit_tests.sources.streams.concurrent.scenarios.stream_facade_scenarios import ( + test_stream_facade_multiple_streams, + test_stream_facade_raises_exception, + test_stream_facade_single_stream, + test_stream_facade_single_stream_with_multiple_slices, + test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two, + test_stream_facade_single_stream_with_primary_key, + test_stream_facade_single_stream_with_single_slice, +) +from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_scenarios import ( + test_concurrent_cdk_multiple_streams, + test_concurrent_cdk_partition_raises_exception, + test_concurrent_cdk_single_stream, + test_concurrent_cdk_single_stream_multiple_partitions, + test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_two, + test_concurrent_cdk_single_stream_with_primary_key, +) + +scenarios = [ + test_concurrent_cdk_single_stream, + test_concurrent_cdk_multiple_streams, + test_concurrent_cdk_single_stream_multiple_partitions, + test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_two, + test_concurrent_cdk_single_stream_with_primary_key, + test_concurrent_cdk_partition_raises_exception, + # test streams built using the facade + test_stream_facade_single_stream, + test_stream_facade_multiple_streams, + test_stream_facade_single_stream_with_primary_key, + test_stream_facade_single_stream_with_single_slice, + test_stream_facade_single_stream_with_multiple_slices, + test_stream_facade_single_stream_with_multiple_slices_with_concurrency_level_two, + test_stream_facade_raises_exception, +] + + +@pytest.mark.parametrize("scenario", scenarios, ids=[s.name for s in scenarios]) +@freeze_time("2023-06-09T00:00:00Z") +def test_concurrent_read(capsys: CaptureFixture[str], caplog: LogCaptureFixture, tmp_path: PosixPath, scenario: TestScenario) -> None: + verify_read(capsys, caplog, tmp_path, scenario) + + +@pytest.mark.parametrize("scenario", scenarios, ids=[s.name for s in scenarios]) +def test_concurrent_discover(capsys: CaptureFixture[str], tmp_path: PosixPath, scenario: TestScenario) -> None: + verify_discover(capsys, tmp_path, scenario) diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py new file mode 100644 index 000000000000..d30561482594 --- /dev/null +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py @@ -0,0 +1,419 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import logging + +from airbyte_cdk.sources.message import InMemoryMessageRepository +from airbyte_cdk.sources.streams.concurrent.partitions.record import Record +from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream +from airbyte_cdk.sources.utils.slice_logger import AlwaysLogSliceLogger +from unit_tests.sources.file_based.scenarios.scenario_builder import TestScenarioBuilder +from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_source_builder import ( + AlwaysAvailableAvailabilityStrategy, + ConcurrentSourceBuilder, + InMemoryPartition, + InMemoryPartitionGenerator, + NeverLogSliceLogger, +) + +_id_only_stream = ThreadBasedConcurrentStream( + partition_generator=InMemoryPartitionGenerator([InMemoryPartition("partition1", None, [Record({"id": "1"}), Record({"id": "2"})])]), + max_workers=1, + name="stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + availability_strategy=AlwaysAvailableAvailabilityStrategy(), + primary_key=[], + cursor_field=None, + slice_logger=NeverLogSliceLogger(), + logger=logging.getLogger("test_logger"), + message_repository=None, + timeout_seconds=300, +) + +_id_only_stream_with_slice_logger = ThreadBasedConcurrentStream( + partition_generator=InMemoryPartitionGenerator([InMemoryPartition("partition1", None, [Record({"id": "1"}), Record({"id": "2"})])]), + max_workers=1, + name="stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + availability_strategy=AlwaysAvailableAvailabilityStrategy(), + primary_key=[], + cursor_field=None, + slice_logger=AlwaysLogSliceLogger(), + logger=logging.getLogger("test_logger"), + message_repository=None, + timeout_seconds=300, +) + +_id_only_stream_with_primary_key = ThreadBasedConcurrentStream( + partition_generator=InMemoryPartitionGenerator([InMemoryPartition("partition1", None, [Record({"id": "1"}), Record({"id": "2"})])]), + max_workers=1, + name="stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + availability_strategy=AlwaysAvailableAvailabilityStrategy(), + primary_key=["id"], + cursor_field=None, + slice_logger=NeverLogSliceLogger(), + logger=logging.getLogger("test_logger"), + message_repository=None, + timeout_seconds=300, +) + +_id_only_stream_multiple_partitions = ThreadBasedConcurrentStream( + partition_generator=InMemoryPartitionGenerator( + [ + InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]), + InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]), + ] + ), + max_workers=1, + name="stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + availability_strategy=AlwaysAvailableAvailabilityStrategy(), + primary_key=[], + cursor_field=None, + slice_logger=NeverLogSliceLogger(), + logger=logging.getLogger("test_logger"), + message_repository=None, + timeout_seconds=300, +) + +_id_only_stream_multiple_partitions_concurrency_level_two = ThreadBasedConcurrentStream( + partition_generator=InMemoryPartitionGenerator( + [ + InMemoryPartition("partition1", {"p": "1"}, [Record({"id": "1"}), Record({"id": "2"})]), + InMemoryPartition("partition2", {"p": "2"}, [Record({"id": "3"}), Record({"id": "4"})]), + ] + ), + max_workers=2, + name="stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + availability_strategy=AlwaysAvailableAvailabilityStrategy(), + primary_key=[], + cursor_field=None, + slice_logger=NeverLogSliceLogger(), + logger=logging.getLogger("test_logger"), + message_repository=None, + timeout_seconds=300, +) + +_stream_raising_exception = ThreadBasedConcurrentStream( + partition_generator=InMemoryPartitionGenerator( + [InMemoryPartition("partition1", None, [Record({"id": "1"}), ValueError("test exception")])] + ), + max_workers=1, + name="stream1", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + availability_strategy=AlwaysAvailableAvailabilityStrategy(), + primary_key=[], + cursor_field=None, + slice_logger=NeverLogSliceLogger(), + logger=logging.getLogger("test_logger"), + message_repository=None, + timeout_seconds=300, +) + +test_concurrent_cdk_single_stream = ( + TestScenarioBuilder() + .set_name("test_concurrent_cdk_single_stream") + .set_config({}) + .set_source_builder( + ConcurrentSourceBuilder() + .set_streams( + [ + _id_only_stream, + ] + ) + .set_message_repository(InMemoryMessageRepository()) + ) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + ] + ) + .set_expected_logs( + { + "read": [ + {"level": "INFO", "message": "Starting syncing ConcurrentCdkSource"}, + {"level": "INFO", "message": "Marking stream stream1 as STARTED"}, + {"level": "INFO", "message": "Syncing stream: stream1"}, + {"level": "INFO", "message": "Marking stream stream1 as RUNNING"}, + {"level": "INFO", "message": "Read 2 records from stream1 stream"}, + {"level": "INFO", "message": "Marking stream stream1 as STOPPED"}, + {"level": "INFO", "message": "Finished syncing stream1"}, + {"level": "INFO", "message": "ConcurrentCdkSource runtimes"}, + {"level": "INFO", "message": "Finished syncing ConcurrentCdkSource"}, + ] + } + ) + .set_log_levels({"ERROR", "WARN", "WARNING", "INFO", "DEBUG"}) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .build() +) + +test_concurrent_cdk_single_stream_with_primary_key = ( + TestScenarioBuilder() + .set_name("test_concurrent_cdk_single_stream_with_primary_key") + .set_config({}) + .set_source_builder( + ConcurrentSourceBuilder().set_streams( + [ + _id_only_stream_with_primary_key, + ] + ) + ) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]], + } + ] + } + ) + .build() +) + +test_concurrent_cdk_multiple_streams = ( + TestScenarioBuilder() + .set_name("test_concurrent_cdk_multiple_streams") + .set_config({}) + .set_source_builder( + ConcurrentSourceBuilder().set_streams( + [ + _id_only_stream, + ThreadBasedConcurrentStream( + partition_generator=InMemoryPartitionGenerator( + [InMemoryPartition("partition1", None, [Record({"id": "10", "key": "v1"}), Record({"id": "20", "key": "v2"})])] + ), + max_workers=1, + name="stream2", + json_schema={ + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + "key": {"type": ["null", "string"]}, + }, + }, + availability_strategy=AlwaysAvailableAvailabilityStrategy(), + primary_key=[], + cursor_field=None, + slice_logger=NeverLogSliceLogger(), + logger=logging.getLogger("test_logger"), + message_repository=None, + timeout_seconds=300, + ), + ] + ) + ) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + {"data": {"id": "10", "key": "v1"}, "stream": "stream2"}, + {"data": {"id": "20", "key": "v2"}, "stream": "stream2"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + }, + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + "key": {"type": ["null", "string"]}, + }, + }, + "name": "stream2", + "supported_sync_modes": ["full_refresh"], + }, + ] + } + ) + .build() +) + +test_concurrent_cdk_partition_raises_exception = ( + TestScenarioBuilder() + .set_name("test_concurrent_partition_raises_exception") + .set_config({}) + .set_source_builder( + ConcurrentSourceBuilder() + .set_streams( + [ + _stream_raising_exception, + ] + ) + .set_message_repository(InMemoryMessageRepository()) + ) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + ] + ) + .set_expected_read_error(ValueError, "test exception") + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .build() +) + +test_concurrent_cdk_single_stream_multiple_partitions = ( + TestScenarioBuilder() + .set_name("test_concurrent_cdk_single_stream_multiple_partitions") + .set_config({}) + .set_source_builder( + ConcurrentSourceBuilder().set_streams( + [ + _id_only_stream_multiple_partitions, + ] + ) + ) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + {"data": {"id": "3"}, "stream": "stream1"}, + {"data": {"id": "4"}, "stream": "stream1"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .build() +) + +test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_two = ( + TestScenarioBuilder() + .set_name("test_concurrent_cdk_single_stream_multiple_partitions_concurrency_level_2") + .set_config({}) + .set_source_builder( + ConcurrentSourceBuilder().set_streams( + [ + _id_only_stream_multiple_partitions_concurrency_level_two, + ] + ) + ) + .set_expected_records( + [ + {"data": {"id": "1"}, "stream": "stream1"}, + {"data": {"id": "2"}, "stream": "stream1"}, + {"data": {"id": "3"}, "stream": "stream1"}, + {"data": {"id": "4"}, "stream": "stream1"}, + ] + ) + .set_expected_catalog( + { + "streams": [ + { + "json_schema": { + "type": "object", + "properties": { + "id": {"type": ["null", "string"]}, + }, + }, + "name": "stream1", + "supported_sync_modes": ["full_refresh"], + } + ] + } + ) + .build() +) diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py new file mode 100644 index 000000000000..ff36800b7c3f --- /dev/null +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py @@ -0,0 +1,116 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import logging +from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union + +from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConnectorSpecification, DestinationSyncMode, SyncMode +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.message import MessageRepository +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade +from airbyte_cdk.sources.streams.concurrent.availability_strategy import AbstractAvailabilityStrategy, StreamAvailability, StreamAvailable +from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition +from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator +from airbyte_cdk.sources.streams.concurrent.partitions.record import Record +from airbyte_cdk.sources.streams.concurrent.thread_based_concurrent_stream import ThreadBasedConcurrentStream +from airbyte_cdk.sources.utils.slice_logger import SliceLogger +from airbyte_protocol.models import ConfiguredAirbyteStream +from unit_tests.sources.file_based.scenarios.scenario_builder import SourceBuilder + + +class ConcurrentCdkSource(AbstractSource): + def __init__(self, streams: List[ThreadBasedConcurrentStream], message_repository: Optional[MessageRepository]): + self._streams = streams + self._message_repository = message_repository + + def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: + # Check is not verified because it is up to the source to implement this method + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + return [StreamFacade(s) for s in self._streams] + + def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification: + return ConnectorSpecification(connectionSpecification={}) + + def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog: + return ConfiguredAirbyteCatalog( + streams=[ + ConfiguredAirbyteStream( + stream=StreamFacade(s).as_airbyte_stream(), + sync_mode=SyncMode.full_refresh, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + for s in self._streams + ] + ) + + @property + def message_repository(self) -> Union[None, MessageRepository]: + return self._message_repository + + +class InMemoryPartitionGenerator(PartitionGenerator): + def __init__(self, partitions: List[Partition]): + self._partitions = partitions + + def generate(self, sync_mode: SyncMode) -> Iterable[Partition]: + yield from self._partitions + + +class InMemoryPartition(Partition): + def __init__(self, name, _slice, records): + self._name = name + self._slice = _slice + self._records = records + + def read(self) -> Iterable[Record]: + for record_or_exception in self._records: + if isinstance(record_or_exception, Exception): + raise record_or_exception + else: + yield record_or_exception + + def to_slice(self) -> Optional[Mapping[str, Any]]: + return self._slice + + def __hash__(self) -> int: + if self._slice: + # Convert the slice to a string so that it can be hashed + s = json.dumps(self._slice, sort_keys=True) + return hash((self._name, s)) + else: + return hash(self._name) + + +class ConcurrentSourceBuilder(SourceBuilder[ConcurrentCdkSource]): + def __init__(self): + self._streams: List[ThreadBasedConcurrentStream] = [] + self._message_repository = None + + def build(self, configured_catalog: Optional[Mapping[str, Any]]) -> ConcurrentCdkSource: + for stream in self._streams: + if not stream._message_repository: + stream._message_repository = self._message_repository + return ConcurrentCdkSource(self._streams, self._message_repository) + + def set_streams(self, streams: List[ThreadBasedConcurrentStream]) -> "ConcurrentSourceBuilder": + self._streams = streams + return self + + def set_message_repository(self, message_repository: MessageRepository) -> "ConcurrentSourceBuilder": + self._message_repository = message_repository + return self + + +class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy): + def check_availability(self, logger: logging.Logger) -> StreamAvailability: + return StreamAvailable() + + +class NeverLogSliceLogger(SliceLogger): + def should_log_slice_message(self, logger: logging.Logger) -> bool: + return False diff --git a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py index 49d1ad65cb5e..14aacffcb672 100644 --- a/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py +++ b/airbyte-cdk/python/unit_tests/sources/streams/concurrent/test_thread_based_concurrent_stream.py @@ -5,6 +5,7 @@ import unittest from unittest.mock import Mock, call +import pytest from airbyte_cdk.models import AirbyteStream, SyncMode from airbyte_cdk.sources.streams.concurrent.availability_strategy import STREAM_AVAILABLE from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition @@ -50,6 +51,15 @@ def test_check_availability(self): assert availability == STREAM_AVAILABLE self._availability_strategy.check_availability.assert_called_once_with(self._logger) + def test_check_for_error_raises_an_exception_if_any_of_the_futures_are_not_done(self): + futures = [Mock() for _ in range(3)] + for f in futures: + f.exception.return_value = None + futures[0].done.return_value = False + + with self.assertRaises(Exception): + self._stream._check_for_errors(futures) + def test_check_for_error_raises_no_exception_if_all_futures_succeeded(self): futures = [Mock() for _ in range(3)] for f in futures: @@ -66,14 +76,17 @@ def test_check_for_error_raises_an_exception_if_any_of_the_futures_raised_an_exc with self.assertRaises(Exception): self._stream._check_for_errors(futures) - def test_check_for_error_raises_an_exception_if_any_of_the_futures_are_not_done(self): - futures = [Mock() for _ in range(3)] - for f in futures: - f.exception.return_value = None - futures[0].done.return_value = False + def test_read_raises_an_exception_if_a_partition_raises_an_exception(self): + partition = Mock(spec=Partition) + partition.read.side_effect = RuntimeError("error") + self._partition_generator.generate.return_value = [partition] + with pytest.raises(RuntimeError): + list(self._stream.read()) - with self.assertRaises(Exception): - self._stream._check_for_errors(futures) + def test_read_raises_an_exception_if_partition_generator_raises_an_exception(self): + self._partition_generator.generate.side_effect = RuntimeError("error") + with pytest.raises(RuntimeError): + list(self._stream.read()) def test_read_no_slice_message(self): partition = Mock(spec=Partition) diff --git a/airbyte-ci/connectors/pipelines/README.md b/airbyte-ci/connectors/pipelines/README.md index d14af9d3f6cd..4eaf7a93224a 100644 --- a/airbyte-ci/connectors/pipelines/README.md +++ b/airbyte-ci/connectors/pipelines/README.md @@ -133,18 +133,21 @@ Available commands: * `airbyte-ci connectors publish`: Publish a connector to Airbyte's DockerHub. #### Options -| Option | Multiple | Default value | Description | -| -------------------------------------------------------------- | -------- | -------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `--use-remote-secrets` | False | True | If True, connectors configuration will be pulled from Google Secret Manager. Requires the GCP_GSM_CREDENTIALS environment variable to be set with a service account with permission to read GSM secrets. If False the connector configuration will be read from the local connector `secrets` folder. | -| `--name` | True | | Select a specific connector for which the pipeline will run. Can be used multiple time to select multiple connectors. The expected name is the connector technical name. e.g. `source-pokeapi` | -| `--support-level` | True | | Select connectors with a specific support level: `community`, `certified`. Can be used multiple times to select multiple support levels. | -| `--metadata-query` | False | | Filter connectors by the `data` field in the metadata file using a [simpleeval](https://github.com/danthedeckie/simpleeval) query. e.g. 'data.ab_internal.ql == 200' | -| `--use-local-cdk` | False | False | Build with the airbyte-cdk from the local repository. " "This is useful for testing changes to the CDK. | -| `--language` | True | | Select connectors with a specific language: `python`, `low-code`, `java`. Can be used multiple times to select multiple languages. | -| `--modified` | False | False | Run the pipeline on only the modified connectors on the branch or previous commit (depends on the pipeline implementation). | -| `--concurrency` | False | 5 | Control the number of connector pipelines that can run in parallel. Useful to speed up pipelines or control their resource usage. | -| `--metadata-change-only/--not-metadata-change-only` | False | `--not-metadata-change-only` | Only run the pipeline on connectors with changes on their metadata.yaml file. | -| `--enable-dependency-scanning / --disable-dependency-scanning` | False | ` --disable-dependency-scanning` | When enabled the dependency scanning will be performed to detect the connectors to select according to a dependency change. | +| Option | Multiple | Default value | Mapped Environment Variable | Description | +| -------------------------------------------------------------- | -------- | -------------------------------- | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `--use-remote-secrets` | False | True | | If True, connectors configuration will be pulled from Google Secret Manager. Requires the GCP_GSM_CREDENTIALS environment variable to be set with a service account with permission to read GSM secrets. If False the connector configuration will be read from the local connector `secrets` folder. | +| `--name` | True | | | Select a specific connector for which the pipeline will run. Can be used multiple times to select multiple connectors. The expected name is the connector technical name. e.g. `source-pokeapi` | +| `--support-level` | True | | | Select connectors with a specific support level: `community`, `certified`. Can be used multiple times to select multiple support levels. | +| `--metadata-query` | False | | | Filter connectors by the `data` field in the metadata file using a [simpleeval](https://github.com/danthedeckie/simpleeval) query. e.g. 'data.ab_internal.ql == 200' | +| `--use-local-cdk` | False | False | | Build with the airbyte-cdk from the local repository. " "This is useful for testing changes to the CDK. | +| `--language` | True | | | Select connectors with a specific language: `python`, `low-code`, `java`. Can be used multiple times to select multiple languages. | +| `--modified` | False | False | | Run the pipeline on only the modified connectors on the branch or previous commit (depends on the pipeline implementation). | +| `--concurrency` | False | 5 | | Control the number of connector pipelines that can run in parallel. Useful to speed up pipelines or control their resource usage. | +| `--metadata-change-only/--not-metadata-change-only` | False | `--not-metadata-change-only` | | Only run the pipeline on connectors with changes on their metadata.yaml file. | +| `--enable-dependency-scanning / --disable-dependency-scanning` | False | ` --disable-dependency-scanning` | | When enabled the dependency scanning will be performed to detect the connectors to select according to a dependency change. | +| `--docker-hub-username` | | | DOCKER_HUB_USERNAME | Your username to connect to DockerHub. Required for the publish subcommand. | +| `--docker-hub-password` | | | DOCKER_HUB_PASSWORD | Your password to connect to DockerHub. Required for the publish subcommand. | + ### `connectors list` command Retrieve the list of connectors satisfying the provided filters. @@ -293,8 +296,6 @@ Publish all connectors modified in the head commit: `airbyte-ci connectors --mod | Option | Required | Default | Mapped environment variable | Description | | ------------------------------------ | -------- | --------------- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `--pre-release/--main-release` | False | `--pre-release` | | Whether to publish the pre-release or the main release version of a connector. Defaults to pre-release. For main release you have to set the credentials to interact with the GCS bucket. | -| `--docker-hub-username` | True | | `DOCKER_HUB_USERNAME` | Your username to connect to DockerHub. | -| `--docker-hub-password` | True | | `DOCKER_HUB_PASSWORD` | Your password to connect to DockerHub. | | `--spec-cache-gcs-credentials` | False | | `SPEC_CACHE_GCS_CREDENTIALS` | The service account key to upload files to the GCS bucket hosting spec cache. | | `--spec-cache-bucket-name` | False | | `SPEC_CACHE_BUCKET_NAME` | The name of the GCS bucket where specs will be cached. | | `--metadata-service-gcs-credentials` | False | | `METADATA_SERVICE_GCS_CREDENTIALS` | The service account key to upload files to the GCS bucket hosting the metadata files. | @@ -398,6 +399,9 @@ This command runs the Python tests for a airbyte-ci poetry package. ## Changelog | Version | PR | Description | | ------- | ---------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | +| 2.2.6 | [#31752](https://github.com/airbytehq/airbyte/pull/31752) | Only authenticate when secrets are available. +| 2.2.5 | [#31718](https://github.com/airbytehq/airbyte/pull/31718) | Authenticate the sidecar docker daemon to DockerHub. | +| 2.2.4 | [#31535](https://github.com/airbytehq/airbyte/pull/31535) | Improve gradle caching when building java connectors. | | 2.2.3 | [#31688](https://github.com/airbytehq/airbyte/pull/31688) | Fix failing `CheckBaseImageUse` step when not running on PR. | | 2.2.2 | [#31659](https://github.com/airbytehq/airbyte/pull/31659) | Support builds on x86_64 platform | | 2.2.1 | [#31653](https://github.com/airbytehq/airbyte/pull/31653) | Fix CheckBaseImageIsUsed failing on non certified connectors. | diff --git a/airbyte-ci/connectors/pipelines/airbyte-integrations/connectors/source-hubspot/metadata.yaml b/airbyte-ci/connectors/pipelines/airbyte-integrations/connectors/source-hubspot/metadata.yaml new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/build_image/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/build_image/commands.py index 0cdf469c9d9c..7c24e6766b2c 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/build_image/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/build_image/commands.py @@ -40,6 +40,8 @@ def build(ctx: click.Context, use_host_gradle_dist_tar: bool) -> bool: use_local_cdk=ctx.obj.get("use_local_cdk"), open_report_in_browser=ctx.obj.get("open_report_in_browser"), use_host_gradle_dist_tar=use_host_gradle_dist_tar, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/bump_version/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/bump_version/commands.py index 1da52905c82d..72d69ddba4cf 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/bump_version/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/bump_version/commands.py @@ -41,6 +41,8 @@ def bump_version( ci_git_user=ctx.obj["ci_git_user"], ci_github_access_token=ctx.obj["ci_github_access_token"], open_report_in_browser=False, + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/commands.py index f985348db32a..be99c29c70af 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/commands.py @@ -177,6 +177,20 @@ def validate_environment(is_local: bool, use_remote_secrets: bool): default=True, type=bool, ) +@click.option( + "--docker-hub-username", + help="Your username to connect to DockerHub.", + type=click.STRING, + required=False, + envvar="DOCKER_HUB_USERNAME", +) +@click.option( + "--docker-hub-password", + help="Your password to connect to DockerHub.", + type=click.STRING, + required=False, + envvar="DOCKER_HUB_PASSWORD", +) @click.pass_context def connectors( ctx: click.Context, @@ -192,6 +206,8 @@ def connectors( enable_dependency_scanning: bool, use_local_cdk: bool, enable_report_auto_open: bool, + docker_hub_username: str, + docker_hub_password: str, ): """Group all the connectors-ci command.""" validate_environment(ctx.obj["is_local"], use_remote_secrets) @@ -202,6 +218,8 @@ def connectors( ctx.obj["execute_timeout"] = execute_timeout ctx.obj["use_local_cdk"] = use_local_cdk ctx.obj["open_report_in_browser"] = enable_report_auto_open + ctx.obj["docker_hub_username"] = docker_hub_username + ctx.obj["docker_hub_password"] = docker_hub_password ctx.obj["selected_connectors_with_modified_files"] = get_selected_connectors_with_modified_files( names, support_levels, diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py index 1c7dc0615509..0036781b8154 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/context.py @@ -11,7 +11,7 @@ import yaml from anyio import Path from asyncer import asyncify -from dagger import Directory +from dagger import Directory, Secret from github import PullRequest from pipelines.airbyte_ci.connectors.reports import ConnectorReport from pipelines.dagger.actions import secrets @@ -57,6 +57,8 @@ def __init__( open_report_in_browser: bool = True, docker_hub_username: Optional[str] = None, docker_hub_password: Optional[str] = None, + s3_build_cache_access_key_id: Optional[str] = None, + s3_build_cache_secret_key: Optional[str] = None, ): """Initialize a connector context. @@ -82,6 +84,8 @@ def __init__( open_report_in_browser (bool, optional): Open HTML report in browser window. Defaults to True. docker_hub_username (Optional[str], optional): Docker Hub username to use to read registries. Defaults to None. docker_hub_password (Optional[str], optional): Docker Hub password to use to read registries. Defaults to None. + s3_build_cache_access_key_id (Optional[str], optional): Gradle S3 Build Cache credentials. Defaults to None. + s3_build_cache_secret_key (Optional[str], optional): Gradle S3 Build Cache credentials. Defaults to None. """ self.pipeline_name = pipeline_name @@ -101,6 +105,8 @@ def __init__( self.open_report_in_browser = open_report_in_browser self.docker_hub_username = docker_hub_username self.docker_hub_password = docker_hub_password + self.s3_build_cache_access_key_id = s3_build_cache_access_key_id + self.s3_build_cache_secret_key = s3_build_cache_secret_key super().__init__( pipeline_name=pipeline_name, @@ -121,6 +127,18 @@ def __init__( open_report_in_browser=open_report_in_browser, ) + @property + def s3_build_cache_access_key_id_secret(self) -> Optional[Secret]: + if self.s3_build_cache_access_key_id: + return self.dagger_client.set_secret("s3_build_cache_access_key_id", self.s3_build_cache_access_key_id) + return None + + @property + def s3_build_cache_secret_key_secret(self) -> Optional[Secret]: + if self.s3_build_cache_access_key_id and self.s3_build_cache_secret_key: + return self.dagger_client.set_secret("s3_build_cache_secret_key", self.s3_build_cache_secret_key) + return None + @property def modified_files(self): return self.connector.modified_files @@ -173,6 +191,18 @@ def docker_image_tag(self) -> str: def docker_image(self) -> str: return f"{self.docker_repository}:{self.docker_image_tag}" + @property + def docker_hub_username_secret(self) -> Optional[Secret]: + if self.docker_hub_username is None: + return None + return self.dagger_client.set_secret("docker_hub_username", self.docker_hub_username) + + @property + def docker_hub_password_secret(self) -> Optional[Secret]: + if self.docker_hub_password is None: + return None + return self.dagger_client.set_secret("docker_hub_password", self.docker_hub_password) + async def get_connector_dir(self, exclude=None, include=None) -> Directory: """Get the connector under test source code directory. diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/migrate_to_base_image/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/migrate_to_base_image/commands.py index b57afc0e0005..c238a301fc69 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/migrate_to_base_image/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/migrate_to_base_image/commands.py @@ -8,6 +8,7 @@ from pipelines.airbyte_ci.connectors.migrate_to_base_image.pipeline import run_connector_migration_to_base_image_pipeline from pipelines.airbyte_ci.connectors.pipeline import run_connectors_pipelines from pipelines.cli.dagger_pipeline_command import DaggerPipelineCommand +from pipelines.helpers.utils import fail_if_missing_docker_hub_creds @click.command( @@ -15,29 +16,15 @@ short_help="Make the selected connectors use our base image: remove dockerfile, update metadata.yaml and update documentation.", ) @click.argument("pull-request-number", type=str) -@click.option( - "--docker-hub-username", - help="Your username to connect to DockerHub to read the registries.", - type=click.STRING, - required=True, - envvar="DOCKER_HUB_USERNAME", -) -@click.option( - "--docker-hub-password", - help="Your password to connect to DockerHub to read the registries.", - type=click.STRING, - required=True, - envvar="DOCKER_HUB_PASSWORD", -) @click.pass_context def migrate_to_base_image( ctx: click.Context, pull_request_number: str, - docker_hub_username: str, - docker_hub_password: str, ) -> bool: """Bump a connector version: update metadata.yaml, changelog and delete legacy files.""" + fail_if_missing_docker_hub_creds(ctx) + connectors_contexts = [ ConnectorContext( pipeline_name=f"Upgrade base image versions of connector {connector.technical_name}", @@ -56,8 +43,10 @@ def migrate_to_base_image( ci_git_user=ctx.obj["ci_git_user"], ci_github_access_token=ctx.obj["ci_github_access_token"], open_report_in_browser=False, - docker_hub_username=docker_hub_username, - docker_hub_password=docker_hub_password, + docker_hub_username=ctx.obj.get("docker_hub_username"), + docker_hub_password=ctx.obj.get("docker_hub_password"), + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/pipeline.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/pipeline.py index 538ab673b92b..f56824d31dbc 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/pipeline.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/pipeline.py @@ -90,7 +90,17 @@ async def run_connectors_pipelines( # HACK: This is to get a long running dockerd service to be shared across all the connectors pipelines # Using the "normal" service binding leads to restart of dockerd during pipeline run that can cause corrupted docker state # See https://github.com/airbytehq/airbyte/issues/27233 - dockerd_service = docker.with_global_dockerd_service(dagger_client) + + docker_hub_username = contexts[0].docker_hub_username + docker_hub_password = contexts[0].docker_hub_password + + if docker_hub_username and docker_hub_password: + docker_hub_username_secret = dagger_client.set_secret("DOCKER_HUB_USERNAME", docker_hub_username) + docker_hub_password_secret = dagger_client.set_secret("DOCKER_HUB_PASSWORD", docker_hub_password) + dockerd_service = docker.with_global_dockerd_service(dagger_client, docker_hub_username_secret, docker_hub_password_secret) + else: + dockerd_service = docker.with_global_dockerd_service(dagger_client) + async with anyio.create_task_group() as tg_main: tg_main.start_soon(dockerd_service.sync) await ( # Wait for the docker service to be ready diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/commands.py index a13e4507eeab..fd5bc65ebba9 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/commands.py @@ -10,6 +10,7 @@ from pipelines.airbyte_ci.connectors.publish.pipeline import reorder_contexts, run_connector_publish_pipeline from pipelines.cli.dagger_pipeline_command import DaggerPipelineCommand from pipelines.consts import ContextState +from pipelines.helpers.utils import fail_if_missing_docker_hub_creds @click.command(cls=DaggerPipelineCommand, help="Publish all images for the selected connectors.") @@ -42,20 +43,6 @@ required=True, envvar="METADATA_SERVICE_BUCKET_NAME", ) -@click.option( - "--docker-hub-username", - help="Your username to connect to DockerHub.", - type=click.STRING, - required=True, - envvar="DOCKER_HUB_USERNAME", -) -@click.option( - "--docker-hub-password", - help="Your password to connect to DockerHub.", - type=click.STRING, - required=True, - envvar="DOCKER_HUB_PASSWORD", -) @click.option( "--slack-webhook", help="The Slack webhook URL to send notifications to.", @@ -77,8 +64,6 @@ def publish( spec_cache_bucket_name: str, metadata_service_bucket_name: str, metadata_service_gcs_credentials: str, - docker_hub_username: str, - docker_hub_password: str, slack_webhook: str, slack_channel: str, ): @@ -92,6 +77,8 @@ def publish( abort=True, ) + fail_if_missing_docker_hub_creds(ctx) + publish_connector_contexts = reorder_contexts( [ PublishConnectorContext( @@ -101,8 +88,8 @@ def publish( spec_cache_bucket_name=spec_cache_bucket_name, metadata_service_gcs_credentials=metadata_service_gcs_credentials, metadata_bucket_name=metadata_service_bucket_name, - docker_hub_username=docker_hub_username, - docker_hub_password=docker_hub_password, + docker_hub_username=ctx.obj["docker_hub_username"], + docker_hub_password=ctx.obj["docker_hub_password"], slack_webhook=slack_webhook, reporting_slack_channel=slack_channel, ci_report_bucket=ctx.obj["ci_report_bucket_name"], @@ -116,6 +103,8 @@ def publish( ci_context=ctx.obj.get("ci_context"), ci_gcs_credentials=ctx.obj["ci_gcs_credentials"], pull_request=ctx.obj.get("pull_request"), + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/context.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/context.py index 8c15ff3cb32c..064f885643c0 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/context.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/publish/context.py @@ -39,6 +39,8 @@ def __init__( ci_context: Optional[str] = None, ci_gcs_credentials: str = None, pull_request: PullRequest = None, + s3_build_cache_access_key_id: Optional[str] = None, + s3_build_cache_secret_key: Optional[str] = None, ): self.pre_release = pre_release self.spec_cache_bucket_name = spec_cache_bucket_name @@ -66,16 +68,10 @@ def __init__( should_save_report=True, docker_hub_username=docker_hub_username, docker_hub_password=docker_hub_password, + s3_build_cache_access_key_id=s3_build_cache_access_key_id, + s3_build_cache_secret_key=s3_build_cache_secret_key, ) - @property - def docker_hub_username_secret(self) -> Secret: - return self.dagger_client.set_secret("docker_hub_username", self.docker_hub_username) - - @property - def docker_hub_password_secret(self) -> Secret: - return self.dagger_client.set_secret("docker_hub_password", self.docker_hub_password) - @property def metadata_service_gcs_credentials_secret(self) -> Secret: return self.dagger_client.set_secret("metadata_service_gcs_credentials", self.metadata_service_gcs_credentials) diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/commands.py index 8ed006d81143..a461b9d0bf04 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/commands.py @@ -13,6 +13,7 @@ from pipelines.cli.dagger_pipeline_command import DaggerPipelineCommand from pipelines.consts import ContextState from pipelines.helpers.github import update_global_commit_status_check_for_tests +from pipelines.helpers.utils import fail_if_missing_docker_hub_creds @click.command(cls=DaggerPipelineCommand, help="Test all the selected connectors.") @@ -49,6 +50,8 @@ def test( Args: ctx (click.Context): The click context. """ + if ctx.obj["is_ci"]: + fail_if_missing_docker_hub_creds(ctx) if ctx.obj["is_ci"] and ctx.obj["pull_request"] and ctx.obj["pull_request"].draft: main_logger.info("Skipping connectors tests for draft pull request.") sys.exit(0) @@ -80,6 +83,10 @@ def test( fast_tests_only=fast_tests_only, code_tests_only=code_tests_only, use_local_cdk=ctx.obj.get("use_local_cdk"), + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), + docker_hub_username=ctx.obj.get("docker_hub_username"), + docker_hub_password=ctx.obj.get("docker_hub_password"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/upgrade_base_image/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/upgrade_base_image/commands.py index 7c857bf617a4..755e2905744c 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/upgrade_base_image/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/upgrade_base_image/commands.py @@ -8,28 +8,17 @@ from pipelines.airbyte_ci.connectors.migrate_to_base_image.pipeline import run_connector_base_image_upgrade_pipeline from pipelines.airbyte_ci.connectors.pipeline import run_connectors_pipelines from pipelines.cli.dagger_pipeline_command import DaggerPipelineCommand +from pipelines.helpers.utils import fail_if_missing_docker_hub_creds @click.command(cls=DaggerPipelineCommand, short_help="Upgrades the base image version used by the selected connectors.") @click.option("--set-if-not-exists", default=True) -@click.option( - "--docker-hub-username", - help="Your username to connect to DockerHub to read the registries.", - type=click.STRING, - required=True, - envvar="DOCKER_HUB_USERNAME", -) -@click.option( - "--docker-hub-password", - help="Your password to connect to DockerHub to read the registries.", - type=click.STRING, - required=True, - envvar="DOCKER_HUB_PASSWORD", -) @click.pass_context def upgrade_base_image(ctx: click.Context, set_if_not_exists: bool, docker_hub_username: str, docker_hub_password: str) -> bool: """Upgrades the base image version used by the selected connectors.""" + fail_if_missing_docker_hub_creds(ctx) + connectors_contexts = [ ConnectorContext( pipeline_name=f"Upgrade base image versions of connector {connector.technical_name}", @@ -48,8 +37,10 @@ def upgrade_base_image(ctx: click.Context, set_if_not_exists: bool, docker_hub_u ci_git_user=ctx.obj["ci_git_user"], ci_github_access_token=ctx.obj["ci_github_access_token"], open_report_in_browser=False, - docker_hub_username=docker_hub_username, - docker_hub_password=docker_hub_password, + docker_hub_username=ctx.obj.get("docker_hub_username"), + docker_hub_password=ctx.obj.get("docker_hub_password"), + s3_build_cache_access_key_id=ctx.obj.get("s3_build_cache_access_key_id"), + s3_build_cache_secret_key=ctx.obj.get("s3_build_cache_secret_key"), ) for connector in ctx.obj["selected_connectors_with_modified_files"] ] diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index 924fc8807fcf..b2b383f59ab7 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -26,7 +26,10 @@ class GradleTask(Step, ABC): mount_connector_secrets (bool): Whether to mount connector secrets. """ - DEFAULT_GRADLE_TASK_OPTIONS = ("--no-daemon", "--scan", "--build-cache", "--console=plain") + DEFAULT_GRADLE_TASK_OPTIONS = ("--no-daemon", "--no-watch-fs", "--scan", "--build-cache", "--console=plain") + LOCAL_MAVEN_REPOSITORY_PATH = "/root/.m2" + GRADLE_DEP_CACHE_PATH = "/root/gradle-cache" + GRADLE_HOME_PATH = "/root/.gradle" gradle_task_name: ClassVar[str] bind_to_docker_host: ClassVar[bool] = False @@ -36,11 +39,9 @@ def __init__(self, context: PipelineContext) -> None: super().__init__(context) @property - def connector_java_build_cache(self) -> CacheVolume: - # TODO: remove this once we finish the project to boost source-postgres CI performance. - # We should use a static gradle-cache volume name. - cache_volume_name = hacks.get_gradle_cache_volume_name(self.context, self.logger) - return self.context.dagger_client.cache_volume(cache_volume_name) + def dependency_cache_volume(self) -> CacheVolume: + """This cache volume is for sharing gradle dependencies (jars and poms) across all pipeline runs.""" + return self.context.dagger_client.cache_volume("gradle-dependency-cache") @property def build_include(self) -> List[str]: @@ -56,15 +57,8 @@ def build_include(self) -> List[str]: for dependency_directory in self.context.connector.get_local_dependency_paths(with_test_dependencies=True) ] - def _get_gradle_command(self, task: str) -> List[str]: - return sh_dash_c( - [ - # The gradle command is chained in between a couple of rsyncs which load from- and store to the cache volume. - "(rsync -a --stats /root/gradle-cache/ /root/.gradle || true)", - f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS)} {task}", - "(rsync -a --stats /root/.gradle/ /root/gradle-cache || true)", - ] - ) + def _get_gradle_command(self, task: str, *args) -> str: + return f"./gradlew {' '.join(self.DEFAULT_GRADLE_TASK_OPTIONS + args)} {task}" async def _run(self) -> StepResult: include = [ @@ -85,7 +79,6 @@ async def _run(self) -> StepResult: "tools/lib/lib.sh", "tools/gradle/codestyle", "pyproject.toml", - "airbyte-cdk/java/airbyte-cdk/**", ] + self.build_include yum_packages_to_install = [ @@ -97,12 +90,17 @@ async def _run(self) -> StepResult: "rsync", # required for gradle cache synchronization. ] - # Define a gradle container which will be cached and re-used for all tasks. - # We should do our best to cram any generic & expensive layers in here. - gradle_container = ( + # Common base container. + gradle_container_base = ( self.dagger_client.container() # Use a linux+jdk base image with long-term support, such as amazoncorretto. .from_(AMAZONCORRETTO_IMAGE) + # Mount the dependency cache volume, but not to $GRADLE_HOME, because gradle doesn't expect concurrent modifications. + .with_mounted_cache(self.GRADLE_DEP_CACHE_PATH, self.dependency_cache_volume, sharing=CacheSharingMode.LOCKED) + # Set GRADLE_HOME to the directory which will be rsync-ed with the gradle cache volume. + .with_env_variable("GRADLE_HOME", self.GRADLE_HOME_PATH) + # Same for GRADLE_USER_HOME. + .with_env_variable("GRADLE_USER_HOME", self.GRADLE_HOME_PATH) # Install a bunch of packages as early as possible. .with_exec( sh_dash_c( @@ -120,36 +118,73 @@ async def _run(self) -> StepResult: ] ) ) - # Set GRADLE_HOME and GRADLE_USER_HOME to the directory which will be rsync-ed with the gradle cache volume. - .with_env_variable("GRADLE_HOME", "/root/.gradle") - .with_env_variable("GRADLE_USER_HOME", "/root/.gradle") # Set RUN_IN_AIRBYTE_CI to tell gradle how to configure its build cache. # This is consumed by settings.gradle in the repo root. .with_env_variable("RUN_IN_AIRBYTE_CI", "1") + # Disable the Ryuk container because it needs privileged docker access which it can't have. + .with_env_variable("TESTCONTAINERS_RYUK_DISABLED", "true") + # Set the current working directory. + .with_workdir("/airbyte") # TODO: remove this once we finish the project to boost source-postgres CI performance. .with_env_variable("CACHEBUSTER", hacks.get_cachebuster(self.context, self.logger)) - # Mount the gradle cache volume. - # We deliberately don't mount it at $GRADLE_HOME, instead we load it there and store it from there using rsync. - # This is because the volume is accessed concurrently by all GradleTask instances. - # Hence, why we synchronize the writes by setting the `sharing` parameter to LOCKED. - .with_mounted_cache("/root/gradle-cache", self.connector_java_build_cache, sharing=CacheSharingMode.LOCKED) - # Mount the parts of the repo which interest us in /airbyte. - .with_workdir("/airbyte") + ) + + # Augment the base container with S3 build cache secrets when available. + if self.context.s3_build_cache_access_key_id: + gradle_container_base = gradle_container_base.with_secret_variable( + "S3_BUILD_CACHE_ACCESS_KEY_ID", self.context.s3_build_cache_access_key_id_secret + ) + if self.context.s3_build_cache_secret_key: + gradle_container_base = gradle_container_base.with_secret_variable( + "S3_BUILD_CACHE_SECRET_KEY", self.context.s3_build_cache_secret_key_secret + ) + + # Running a gradle task like "help" with these arguments will trigger updating all dependencies. + # When the cache is cold, this downloads many gigabytes of jars and poms from all over the internet. + warm_dependency_cache_args = ["--write-verification-metadata", "sha256", "--dry-run"] + if self.context.is_local: + # When running locally, this dependency update is slower and less useful than within a CI runner. Skip it. + warm_dependency_cache_args = ["--dry-run"] + + # Mount the whole git repo to update the cache volume contents and build the CDK. + with_whole_git_repo = ( + gradle_container_base + # Mount the whole repo. + .with_directory("/airbyte", self.context.get_repo_dir(".")) + # Update the cache in place by executing a gradle task which will update all dependencies and build the CDK. + .with_exec( + sh_dash_c( + [ + # Ensure that the .m2 directory exists. + f"mkdir -p {self.LOCAL_MAVEN_REPOSITORY_PATH}", + # Load from the cache volume. + f"(rsync -a --stats --mkpath {self.GRADLE_DEP_CACHE_PATH}/ {self.GRADLE_HOME_PATH} || true)", + # Resolve all dependencies and write their checksums to './gradle/verification-metadata.dryrun.xml'. + self._get_gradle_command("help", *warm_dependency_cache_args), + # Build the CDK and publish it to the local maven repository. + self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded"), + # Store to the cache volume. + f"(rsync -a --stats {self.GRADLE_HOME_PATH}/ {self.GRADLE_DEP_CACHE_PATH} || true)", + ] + ) + ) + ) + + # Mount only the code needed to build the connector. + gradle_container = ( + gradle_container_base + # Copy the local maven repository and force evaluation of `with_whole_git_repo` container. + .with_directory(self.LOCAL_MAVEN_REPOSITORY_PATH, await with_whole_git_repo.directory(self.LOCAL_MAVEN_REPOSITORY_PATH)) + # Mount the connector-agnostic whitelisted files in the git repo. .with_mounted_directory("/airbyte", self.context.get_repo_dir(".", include=include)) + # Mount the sources for the connector and its dependencies in the git repo. .with_mounted_directory(str(self.context.connector.code_directory), await self.context.get_connector_dir()) - # Disable the Ryuk container because it needs privileged docker access that does not work: - .with_env_variable("TESTCONTAINERS_RYUK_DISABLED", "true") - # Run gradle once to populate the container's local maven repository. - # This step is useful also to serve as a basic sanity check and to warm the gradle cache. - # This will download gradle itself, a bunch of poms and jars, compile the gradle plugins, configure tasks, etc. - .with_exec(self._get_gradle_command(":airbyte-cdk:java:airbyte-cdk:publishSnapshotIfNeeded")) ) # From this point on, we add layers which are task-dependent. if self.mount_connector_secrets: - gradle_container = gradle_container.with_( - await secrets.mounted_connector_secrets(self.context, f"{self.context.connector.code_directory}/secrets") - ) + secrets_dir = f"{self.context.connector.code_directory}/secrets" + gradle_container = gradle_container.with_(await secrets.mounted_connector_secrets(self.context, secrets_dir)) if self.bind_to_docker_host: # If this GradleTask subclass needs docker, then install it and bind it to the existing global docker host container. gradle_container = pipelines.dagger.actions.system.docker.with_bound_docker_host(self.context, gradle_container) @@ -158,5 +193,14 @@ async def _run(self) -> StepResult: # Run the gradle task that we actually care about. connector_task = f":airbyte-integrations:connectors:{self.context.connector.technical_name}:{self.gradle_task_name}" - gradle_container = gradle_container.with_exec(self._get_gradle_command(connector_task)) + gradle_container = gradle_container.with_exec( + sh_dash_c( + [ + # Warm the gradle cache. + f"(rsync -a --stats --mkpath {self.GRADLE_DEP_CACHE_PATH}/ {self.GRADLE_HOME_PATH} || true)", + # Run the gradle task. + self._get_gradle_command(connector_task, f"-Ds3BuildCachePrefix={self.context.connector.technical_name}"), + ] + ) + ) return await self.get_step_result(gradle_container) diff --git a/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py b/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py index 7da72b02ee66..5256351700e8 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py +++ b/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py @@ -173,6 +173,8 @@ def get_modified_files( envvar="GCP_GSM_CREDENTIALS", ) @click.option("--ci-job-key", envvar="CI_JOB_KEY", type=str) +@click.option("--s3-build-cache-access-key-id", envvar="S3_BUILD_CACHE_ACCESS_KEY_ID", type=str) +@click.option("--s3-build-cache-secret-key", envvar="S3_BUILD_CACHE_SECRET_KEY", type=str) @click.option("--show-dagger-logs/--hide-dagger-logs", default=False, type=bool) @click.pass_context @track_command @@ -191,6 +193,8 @@ def airbyte_ci( ci_report_bucket_name: str, ci_gcs_credentials: str, ci_job_key: str, + s3_build_cache_access_key_id: str, + s3_build_cache_secret_key: str, show_dagger_logs: bool, ): # noqa D103 ctx.ensure_object(dict) @@ -209,6 +213,8 @@ def airbyte_ci( ctx.obj["ci_git_user"] = ci_git_user ctx.obj["ci_github_access_token"] = ci_github_access_token ctx.obj["ci_job_key"] = ci_job_key + ctx.obj["s3_build_cache_access_key_id"] = s3_build_cache_access_key_id + ctx.obj["s3_build_cache_secret_key"] = s3_build_cache_secret_key ctx.obj["pipeline_start_timestamp"] = pipeline_start_timestamp ctx.obj["show_dagger_logs"] = show_dagger_logs diff --git a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py index b497832c9181..1d1385403603 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py +++ b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/secrets.py @@ -6,7 +6,7 @@ from __future__ import annotations import datetime -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING, Callable, Optional from dagger import Container, Secret from pipelines.helpers.utils import get_file_contents, get_secret_host_variable diff --git a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/system/docker.py b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/system/docker.py index 44e940dcf1da..fc67df6e0270 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/system/docker.py +++ b/airbyte-ci/connectors/pipelines/pipelines/dagger/actions/system/docker.py @@ -4,24 +4,28 @@ import json import uuid -from typing import Callable +from typing import Callable, Optional -from dagger import Client, Container, File +from dagger import Client, Container, File, Secret from pipelines import consts from pipelines.airbyte_ci.connectors.context import ConnectorContext, PipelineContext from pipelines.consts import DOCKER_HOST_NAME, DOCKER_HOST_PORT, DOCKER_TMP_VOLUME_NAME from pipelines.helpers.utils import sh_dash_c -def with_global_dockerd_service(dagger_client: Client) -> Container: +def with_global_dockerd_service( + dagger_client: Client, docker_hub_username_secret: Optional[Secret] = None, docker_hub_password_secret: Optional[Secret] = None +) -> Container: """Create a container with a docker daemon running. We expose its 2375 port to use it as a docker host for docker-in-docker use cases. Args: dagger_client (Client): The dagger client used to create the container. + docker_hub_username_secret (Optional[Secret]): The DockerHub username secret. + docker_hub_password_secret (Optional[Secret]): The DockerHub password secret. Returns: Container: The container running dockerd as a service """ - return ( + dockerd_container = ( dagger_client.container().from_(consts.DOCKER_DIND_IMAGE) # We set this env var because we need to use a non-default zombie reaper setting. # The reason for this is that by default it will want to set its parent process ID to 1 when reaping. @@ -46,10 +50,15 @@ def with_global_dockerd_service(dagger_client: Client) -> Container: .with_exposed_port(DOCKER_HOST_PORT) # Mount the docker cache volumes. .with_mounted_cache("/tmp", dagger_client.cache_volume(DOCKER_TMP_VOLUME_NAME)) - # Run the docker daemon and bind it to the exposed TCP port. - .with_exec( - ["dockerd", "--log-level=error", f"--host=tcp://0.0.0.0:{DOCKER_HOST_PORT}", "--tls=false"], insecure_root_capabilities=True + ) + if docker_hub_username_secret and docker_hub_password_secret: + dockerd_container = ( + dockerd_container.with_secret_variable("DOCKER_HUB_USERNAME", docker_hub_username_secret) + .with_secret_variable("DOCKER_HUB_PASSWORD", docker_hub_password_secret) + .with_exec(sh_dash_c(["docker login -u $DOCKER_HUB_USERNAME -p $DOCKER_HUB_PASSWORD"]), skip_entrypoint=True) ) + return dockerd_container.with_exec( + ["dockerd", "--log-level=error", f"--host=tcp://0.0.0.0:{DOCKER_HOST_PORT}", "--tls=false"], insecure_root_capabilities=True ) diff --git a/airbyte-ci/connectors/pipelines/pipelines/hacks.py b/airbyte-ci/connectors/pipelines/pipelines/hacks.py index 4cdac0926b1d..1dcbbbdb1647 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/hacks.py +++ b/airbyte-ci/connectors/pipelines/pipelines/hacks.py @@ -108,20 +108,3 @@ def get_cachebuster(context: ConnectorContext, logger: Logger) -> str: ) return str(context.pipeline_start_timestamp) return "0" - - -def get_gradle_cache_volume_name(context: ConnectorContext, logger: Logger) -> str: - """ - This function will return a semi-static gradle cache volume name for connectors in CONNECTORS_WITHOUT_CACHING and a static value for all other connectors. - By semi-static I mean that the gradle cache volume name will change on each pipeline execution but will be the same for all the steps of the pipeline. - This hack is useful to collect unbiased metrics on the CI speed for connectors in CONNECTORS_WITHOUT_CACHING: it guarantees that the gradle cache volume will be empty on each pipeline execution and no remote caching is used. - - Returns: - str: The gradle cache volume name. - """ - if context.connector.technical_name in CONNECTORS_WITHOUT_CACHING: - logger.warning( - f"Getting a fresh gradle cache volume name for {context.connector.technical_name} to not use remote caching. Only used in the context of the CI performance improvements project for {context.connector.technical_name}." - ) - return f"gradle-cache-{context.pipeline_start_timestamp}" - return "gradle-cache" diff --git a/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py b/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py index 56f2fc61ebbc..76575eaf5664 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py +++ b/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py @@ -11,13 +11,13 @@ import re import sys import unicodedata -from glob import glob from io import TextIOWrapper from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple import anyio import asyncer +import click from dagger import Client, Config, Container, ExecError, File, ImageLayerCompression, QueryError, Secret from more_itertools import chunked @@ -318,3 +318,10 @@ def transform_strs_to_paths(str_paths: List[str]) -> List[Path]: List[Path]: A list of Path objects. """ return [Path(str_path) for str_path in str_paths] + + +def fail_if_missing_docker_hub_creds(ctx: click.Context): + if ctx.obj["docker_hub_username"] is None or ctx.obj["docker_hub_password"] is None: + raise click.UsageError( + "You need to be logged to DockerHub registry to run this command. Please set DOCKER_HUB_USERNAME and DOCKER_HUB_PASSWORD environment variables." + ) diff --git a/airbyte-ci/connectors/pipelines/pyproject.toml b/airbyte-ci/connectors/pipelines/pyproject.toml index c7499c11b299..6a2eb4115194 100644 --- a/airbyte-ci/connectors/pipelines/pyproject.toml +++ b/airbyte-ci/connectors/pipelines/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pipelines" -version = "2.2.3" +version = "2.2.6" description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines" authors = ["Airbyte "] diff --git a/airbyte-ci/connectors/pipelines/tests/test_commands/test_groups/test_connectors.py b/airbyte-ci/connectors/pipelines/tests/test_commands/test_groups/test_connectors.py index aa38aa5efa61..0420bbef0096 100644 --- a/airbyte-ci/connectors/pipelines/tests/test_commands/test_groups/test_connectors.py +++ b/airbyte-ci/connectors/pipelines/tests/test_commands/test_groups/test_connectors.py @@ -5,10 +5,6 @@ from typing import Callable import click -import pipelines.airbyte_ci.connectors.build_image.commands -import pipelines.airbyte_ci.connectors.commands -import pipelines.airbyte_ci.connectors.publish.commands -import pipelines.airbyte_ci.connectors.test.commands import pytest from click.testing import CliRunner from connector_ops.utils import METADATA_FILE_NAME, ConnectorLanguage @@ -250,6 +246,8 @@ def click_context_obj(): "concurrency": 1, "ci_git_user": None, "ci_github_access_token": None, + "docker_hub_username": "foo", + "docker_hub_password": "bar", } @@ -268,10 +266,6 @@ def click_context_obj(): "test", "--metadata-service-bucket-name", "test", - "--docker-hub-username", - "test", - "--docker-hub-password", - "test", ], ), (connectors_build_command.build, []), @@ -293,7 +287,7 @@ def test_commands_do_not_override_connector_selection( mocker.patch.object(connectors_test_command, "ConnectorContext", mock_connector_context) mocker.patch.object(connectors_build_command, "ConnectorContext", mock_connector_context) mocker.patch.object(connectors_publish_command, "PublishConnectorContext", mock_connector_context) - runner.invoke(command, command_args, catch_exceptions=False, obj=click_context_obj) + runner.invoke(command, command_args, catch_exceptions=True, obj=click_context_obj) assert mock_connector_context.call_count == 1 # If the connector selection is overriden the context won't be instantiated with the selected connector mock instance assert mock_connector_context.call_args_list[0].kwargs["connector"] == selected_connector diff --git a/airbyte-integrations/connectors/destination-bigquery/Dockerfile b/airbyte-integrations/connectors/destination-bigquery/Dockerfile index 2fd12c95e0dc..ef014d6411f0 100644 --- a/airbyte-integrations/connectors/destination-bigquery/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery/Dockerfile @@ -25,5 +25,5 @@ ENV AIRBYTE_NORMALIZATION_INTEGRATION bigquery COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=2.1.5 +LABEL io.airbyte.version=2.1.6 LABEL io.airbyte.name=airbyte/destination-bigquery diff --git a/airbyte-integrations/connectors/destination-bigquery/metadata.yaml b/airbyte-integrations/connectors/destination-bigquery/metadata.yaml index 985dfa23dade..e2bf97625512 100644 --- a/airbyte-integrations/connectors/destination-bigquery/metadata.yaml +++ b/airbyte-integrations/connectors/destination-bigquery/metadata.yaml @@ -2,7 +2,7 @@ data: connectorSubtype: database connectorType: destination definitionId: 22f6c74f-5699-40ff-833c-4a879ea40133 - dockerImageTag: 2.1.5 + dockerImageTag: 2.1.6 dockerRepository: airbyte/destination-bigquery githubIssueLabel: destination-bigquery icon: bigquery.svg diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java index 674aa5296026..1e1946fe42cc 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryDestination.java @@ -373,23 +373,20 @@ private SerializedAirbyteMessageConsumer getStandardRecordConsumer(final BigQuer return new BigQueryRecordStandardConsumer( outputRecordCollector, () -> { - final boolean use1s1t = TypingAndDedupingFlag.isDestinationV2(); - if (use1s1t) { - // Set up our raw tables - writeConfigs.get().forEach((streamId, uploader) -> { - final StreamConfig stream = parsedCatalog.getStream(streamId); - if (stream.destinationSyncMode() == DestinationSyncMode.OVERWRITE) { - // For streams in overwrite mode, truncate the raw table. - // non-1s1t syncs actually overwrite the raw table at the end of the sync, so we only do this in - // 1s1t mode. - final TableId rawTableId = TableId.of(stream.id().rawNamespace(), stream.id().rawName()); - bigquery.delete(rawTableId); - BigQueryUtils.createPartitionedTableIfNotExists(bigquery, rawTableId, DefaultBigQueryRecordFormatter.SCHEMA_V2); - } else { - uploader.createRawTable(); - } - }); - } + // Set up our raw tables + writeConfigs.get().forEach((streamId, uploader) -> { + final StreamConfig stream = parsedCatalog.getStream(streamId); + if (stream.destinationSyncMode() == DestinationSyncMode.OVERWRITE) { + // For streams in overwrite mode, truncate the raw table. + // non-1s1t syncs actually overwrite the raw table at the end of the sync, so we only do this in + // 1s1t mode. + final TableId rawTableId = TableId.of(stream.id().rawNamespace(), stream.id().rawName()); + bigquery.delete(rawTableId); + BigQueryUtils.createPartitionedTableIfNotExists(bigquery, rawTableId, DefaultBigQueryRecordFormatter.SCHEMA_V2); + } else { + uploader.createRawTable(); + } + }); }, (hasFailed) -> { try { diff --git a/airbyte-integrations/connectors/source-harness/.dockerignore b/airbyte-integrations/connectors/source-harness/.dockerignore new file mode 100644 index 000000000000..40467f139afd --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_harness +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-klaviyo/Dockerfile b/airbyte-integrations/connectors/source-harness/Dockerfile similarity index 89% rename from airbyte-integrations/connectors/source-klaviyo/Dockerfile rename to airbyte-integrations/connectors/source-harness/Dockerfile index a8963212602f..8542d6eca698 100644 --- a/airbyte-integrations/connectors/source-klaviyo/Dockerfile +++ b/airbyte-integrations/connectors/source-harness/Dockerfile @@ -29,10 +29,10 @@ RUN apk --no-cache add bash # copy payload code only COPY main.py ./ -COPY source_klaviyo ./source_klaviyo +COPY source_harness ./source_harness ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.4.0 -LABEL io.airbyte.name=airbyte/source-klaviyo +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-harness diff --git a/airbyte-integrations/connectors/source-harness/README.md b/airbyte-integrations/connectors/source-harness/README.md new file mode 100644 index 000000000000..31e5b516fdee --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/README.md @@ -0,0 +1,82 @@ +# Harness Source + +This is the repository for the Harness configuration based source connector. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/sources/harness). + +## Local development + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-harness:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/harness) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_harness/spec.yaml` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source harness test creds` +and place them into `secrets/config.json`. + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-harness:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-harness:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-harness:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-harness:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-harness:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-harness:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing + +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +To run your integration tests with Docker, run: +``` +./acceptance-test-docker.sh +``` + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-harness:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-harness:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-harness/__init__.py b/airbyte-integrations/connectors/source-harness/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-harness/acceptance-test-config.yml b/airbyte-integrations/connectors/source-harness/acceptance-test-config.yml new file mode 100644 index 000000000000..28456c8a61fb --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/acceptance-test-config.yml @@ -0,0 +1,39 @@ +# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-harness:dev +acceptance_tests: + spec: + tests: + - spec_path: "source_harness/spec.yaml" + connection: + tests: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + tests: + - config_path: "secrets/config.json" + basic_read: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + # TODO uncomment this block to specify that the tests should assert the connector outputs the records provided in the input file a file + # expect_records: + # path: "integration_tests/expected_records.jsonl" + # extra_fields: no + # exact_order: no + # extra_records: yes + incremental: + bypass_reason: "This connector does not implement incremental sync" + # TODO uncomment this block this block if your connector implements incremental sync: + # tests: + # - config_path: "secrets/config.json" + # configured_catalog_path: "integration_tests/configured_catalog.json" + # future_state: + # future_state_path: "integration_tests/abnormal_state.json" + full_refresh: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-harness/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-harness/acceptance-test-docker.sh new file mode 100755 index 000000000000..b6d65deeccb4 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/acceptance-test-docker.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh + +source "$(git rev-parse --show-toplevel)/airbyte-integrations/bases/connector-acceptance-test/acceptance-test-docker.sh" diff --git a/airbyte-integrations/connectors/source-harness/icon.svg b/airbyte-integrations/connectors/source-harness/icon.svg new file mode 100644 index 000000000000..e1770dde603c --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/icon.svg @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/airbyte-integrations/connectors/source-harness/integration_tests/__init__.py b/airbyte-integrations/connectors/source-harness/integration_tests/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-harness/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-harness/integration_tests/abnormal_state.json new file mode 100644 index 000000000000..52b0f2c2118f --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "todo-abnormal-value" + } +} diff --git a/airbyte-integrations/connectors/source-harness/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-harness/integration_tests/acceptance.py new file mode 100644 index 000000000000..9e6409236281 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("connector_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-harness/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-harness/integration_tests/configured_catalog.json new file mode 100644 index 000000000000..4cf0f64f27b0 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/integration_tests/configured_catalog.json @@ -0,0 +1,13 @@ +{ + "streams": [ + { + "stream": { + "name": "organizations", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-harness/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-harness/integration_tests/invalid_config.json new file mode 100644 index 000000000000..f0d7d0c01c14 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/integration_tests/invalid_config.json @@ -0,0 +1,5 @@ +{ + "api_key": "", + "account_id": "xxxxxxxxxxxxxxxxx", + "api_url": "https://app.harness.io" +} diff --git a/airbyte-integrations/connectors/source-harness/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-harness/integration_tests/sample_config.json new file mode 100644 index 000000000000..b792a18806ff --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/integration_tests/sample_config.json @@ -0,0 +1,5 @@ +{ + "api_key": "xxxxxxxxxxxxxxxxxxxxxxxxxxx", + "account_id": "xxxxxxxxxxxxxxxxx", + "api_url": "https://app.harness.io" +} diff --git a/airbyte-integrations/connectors/source-harness/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-harness/integration_tests/sample_state.json new file mode 100644 index 000000000000..3587e579822d --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "value" + } +} diff --git a/airbyte-integrations/connectors/source-harness/main.py b/airbyte-integrations/connectors/source-harness/main.py new file mode 100644 index 000000000000..b323465b96c8 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_harness import SourceHarness + +if __name__ == "__main__": + source = SourceHarness() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-harness/metadata.yaml b/airbyte-integrations/connectors/source-harness/metadata.yaml new file mode 100644 index 000000000000..857504fc2505 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/metadata.yaml @@ -0,0 +1,25 @@ +data: + allowedHosts: + hosts: + - api.harness.io + registries: + oss: + enabled: false + cloud: + enabled: false + connectorSubtype: api + connectorType: source + definitionId: b0e46f61-e143-47cc-a595-4bb73bfa8a15 + dockerImageTag: 0.1.0 + dockerRepository: airbyte/source-harness + githubIssueLabel: source-harness + icon: harness.svg + license: MIT + name: Harness + releaseDate: 2023-10-10 + releaseStage: alpha + supportLevel: community + documentationUrl: https://docs.airbyte.com/integrations/sources/harness + tags: + - language:lowcode +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-harness/requirements.txt b/airbyte-integrations/connectors/source-harness/requirements.txt new file mode 100644 index 000000000000..cc57334ef619 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/connector-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-harness/setup.py b/airbyte-integrations/connectors/source-harness/setup.py new file mode 100644 index 000000000000..6bef3ce1447c --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/setup.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk"] + +TEST_REQUIREMENTS = [ + "requests-mock~=1.9.3", + "pytest~=6.2", + "pytest-mock~=3.6.1", +] + +setup( + name="source_harness", + description="Source implementation for Harness.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-harness/source_harness/__init__.py b/airbyte-integrations/connectors/source-harness/source_harness/__init__.py new file mode 100644 index 000000000000..1af39ecab1b1 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/source_harness/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceHarness + +__all__ = ["SourceHarness"] diff --git a/airbyte-integrations/connectors/source-harness/source_harness/manifest.yaml b/airbyte-integrations/connectors/source-harness/source_harness/manifest.yaml new file mode 100644 index 000000000000..e01c0d3bd201 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/source_harness/manifest.yaml @@ -0,0 +1,46 @@ +version: "0.29.0" + +definitions: + selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: ["data", "content"] + requester: + type: HttpRequester + url_base: "{{ config['api_url'] }}" + http_method: "GET" + authenticator: + type: "ApiKeyAuthenticator" + header: "x-api-key" + api_token: "{{ config['api_key'] }}" + request_parameters: + accountIdentifier: "{{ config['account_id'] }}" + + retriever: + type: SimpleRetriever + record_selector: + $ref: "#/definitions/selector" + paginator: + type: NoPagination + requester: + $ref: "#/definitions/requester" + + base_stream: + type: DeclarativeStream + retriever: + $ref: "#/definitions/retriever" + + organizations_stream: + $ref: "#/definitions/base_stream" + name: "organizations" + $parameters: + path: "/ng/api/organizations" + +streams: + - "#/definitions/organizations_stream" + +check: + type: CheckStream + stream_names: + - "organizations" diff --git a/airbyte-integrations/connectors/source-harness/source_harness/schemas/organizations.json b/airbyte-integrations/connectors/source-harness/source_harness/schemas/organizations.json new file mode 100644 index 000000000000..88372907ea2b --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/source_harness/schemas/organizations.json @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Organizations schema", + "additionalProperties": true, + "type": ["object", "null"], + "properties": { + "organization": { + "type": ["object", "null"], + "properties": { + "identifier": { + "type": ["string", "null"] + }, + "name": { + "type": ["string", "null"] + }, + "description": { + "type": ["string", "null"] + }, + "tags": { + "type": ["object", "null"], + "properties": { + "identifier": { + "type": ["string", "null"] + } + } + } + } + }, + "createdAt": { + "type": ["number", "null"] + }, + "lastModifiedAt": { + "type": ["number", "null"] + }, + "harnessManaged": { + "type": ["boolean", "null"] + } + } +} diff --git a/airbyte-integrations/connectors/source-harness/source_harness/source.py b/airbyte-integrations/connectors/source-harness/source_harness/source.py new file mode 100644 index 000000000000..a52f4c06db86 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/source_harness/source.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource + +""" +This file provides the necessary constructs to interpret a provided declarative YAML configuration file into +source connector. + +WARNING: Do not modify this file. +""" + + +# Declarative Source +class SourceHarness(YamlDeclarativeSource): + def __init__(self): + super().__init__(**{"path_to_yaml": "manifest.yaml"}) diff --git a/airbyte-integrations/connectors/source-harness/source_harness/spec.yaml b/airbyte-integrations/connectors/source-harness/source_harness/spec.yaml new file mode 100644 index 000000000000..d0bcc389d5f7 --- /dev/null +++ b/airbyte-integrations/connectors/source-harness/source_harness/spec.yaml @@ -0,0 +1,25 @@ +documentationUrl: https://docs.airbyte.com/integrations/sources/harness +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Harness Spec + type: object + required: + - api_key + - account_id + additionalProperties: true + properties: + api_key: + type: string + title: API key + airbyte_secret: true + account_id: + type: string + title: Account ID + description: Harness Account ID + api_url: + type: string + title: API URL + description: The API URL for fetching data from Harness + default: https://app.harness.io + examples: + - https://my-harness-server.example.com diff --git a/airbyte-integrations/connectors/source-klarna/Dockerfile b/airbyte-integrations/connectors/source-klarna/Dockerfile index ce2d2878dd50..68bdf79029bc 100644 --- a/airbyte-integrations/connectors/source-klarna/Dockerfile +++ b/airbyte-integrations/connectors/source-klarna/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9.13-alpine3.15 as base +FROM python:3.9.11-alpine3.15 as base # build and load all requirements FROM base as builder @@ -34,5 +34,5 @@ COPY source_klarna ./source_klarna ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.2.0 LABEL io.airbyte.name=airbyte/source-klarna diff --git a/airbyte-integrations/connectors/source-klarna/README.md b/airbyte-integrations/connectors/source-klarna/README.md index e8c59a8d0a8f..b62a3b49d3e0 100644 --- a/airbyte-integrations/connectors/source-klarna/README.md +++ b/airbyte-integrations/connectors/source-klarna/README.md @@ -1,35 +1,10 @@ # Klarna Source -This is the repository for the Klarna source connector, written in Python. -For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/klarna). +This is the repository for the Klarna configuration based source connector. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/sources/klarna). ## Local development -### Prerequisites -**To iterate on this connector, make sure to complete this prerequisites section.** - -#### Minimum Python version required `= 3.9.0` - -#### Build & Activate Virtual Environment and install dependencies -From this connector directory, create a virtual environment: -``` -python -m venv .venv -``` - -This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your -development environment of choice. To activate it from the terminal, run: -``` -source .venv/bin/activate -pip install -r requirements.txt -pip install '.[tests]' -``` -If you are in an IDE, follow your IDE's instructions to activate the virtualenv. - -Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is -used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. -If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything -should work as you expect. - #### Building via Gradle You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. @@ -39,7 +14,7 @@ To build using Gradle, from the Airbyte repository root, run: ``` #### Create credentials -**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/klarna) +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/klarna) to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_klarna/spec.yaml` file. Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. See `integration_tests/sample_config.json` for a sample config file. @@ -47,14 +22,6 @@ See `integration_tests/sample_config.json` for a sample config file. **If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source klarna test creds` and place them into `secrets/config.json`. -### Locally running the connector -``` -python main.py spec -python main.py check --config secrets/config.json -python main.py discover --config secrets/config.json -python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json -``` - ### Locally running the connector docker image #### Build @@ -79,32 +46,15 @@ docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-klarna:dev discover -- docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-klarna:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json ``` ## Testing -Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. -First install test dependencies into your virtual environment: -``` -pip install .[tests] -``` -### Unit Tests -To run unit tests locally, from the connector directory run: -``` -python -m pytest unit_tests -``` -### Integration Tests -There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). -#### Custom Integration tests -Place custom tests inside `integration_tests/` folder, then, from the connector root, run -``` -python -m pytest integration_tests -``` #### Acceptance Tests -Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. -To run your integration tests with acceptance tests, from the connector root, run + +To run your integration tests with Docker, run: ``` -python -m pytest integration_tests -p integration_tests.acceptance +./acceptance-test-docker.sh ``` -To run your integration tests with docker ### Using gradle to run tests All commands should be run from airbyte project root. diff --git a/airbyte-integrations/connectors/source-klarna/__init__.py b/airbyte-integrations/connectors/source-klarna/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-integrations/connectors/source-klarna/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-klarna/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-klarna/acceptance-test-docker.sh index 5797d20fe9a7..b6d65deeccb4 100644 --- a/airbyte-integrations/connectors/source-klarna/acceptance-test-docker.sh +++ b/airbyte-integrations/connectors/source-klarna/acceptance-test-docker.sh @@ -1,2 +1,3 @@ #!/usr/bin/env sh + source "$(git rev-parse --show-toplevel)/airbyte-integrations/bases/connector-acceptance-test/acceptance-test-docker.sh" diff --git a/airbyte-integrations/connectors/source-klarna/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-klarna/integration_tests/acceptance.py index 82823254d266..9e6409236281 100644 --- a/airbyte-integrations/connectors/source-klarna/integration_tests/acceptance.py +++ b/airbyte-integrations/connectors/source-klarna/integration_tests/acceptance.py @@ -11,4 +11,6 @@ @pytest.fixture(scope="session", autouse=True) def connector_setup(): """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-klarna/metadata.yaml b/airbyte-integrations/connectors/source-klarna/metadata.yaml index 6dc2f5b3bdce..8eff9ee1fc7b 100644 --- a/airbyte-integrations/connectors/source-klarna/metadata.yaml +++ b/airbyte-integrations/connectors/source-klarna/metadata.yaml @@ -1,24 +1,31 @@ data: + allowedHosts: + hosts: + - api.klarna.com + - api.playground.klarna.com + - api-${config.region}.klarna.com + - api-${config.region}.playground.klarna.com + registries: + oss: + enabled: true + cloud: + enabled: true connectorSubtype: api connectorType: source definitionId: 60c24725-00ae-490c-991d-55b78c3197e0 - dockerImageTag: 0.1.0 + dockerImageTag: 0.2.0 dockerRepository: airbyte/source-klarna githubIssueLabel: source-klarna icon: klarna.svg license: MIT name: Klarna - registries: - cloud: - enabled: true - oss: - enabled: true + releaseDate: 2022-10-24 releaseStage: alpha + supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/klarna tags: - - language:python + - language:low-code ab_internal: sl: 100 ql: 100 - supportLevel: community metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-klarna/setup.py b/airbyte-integrations/connectors/source-klarna/setup.py index 046558436d0f..a4742e88dd56 100644 --- a/airbyte-integrations/connectors/source-klarna/setup.py +++ b/airbyte-integrations/connectors/source-klarna/setup.py @@ -5,13 +5,14 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk~=0.2", ""] +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", +] TEST_REQUIREMENTS = [ "requests-mock~=1.9.3", - "pytest~=6.1", + "pytest~=6.2", "pytest-mock~=3.6.1", - "responses~=0.22.0", ] setup( diff --git a/airbyte-integrations/connectors/source-klarna/source_klarna/manifest.yaml b/airbyte-integrations/connectors/source-klarna/source_klarna/manifest.yaml new file mode 100644 index 000000000000..1f6d1db32a83 --- /dev/null +++ b/airbyte-integrations/connectors/source-klarna/source_klarna/manifest.yaml @@ -0,0 +1,117 @@ +version: "0.29.0" + +type: DeclarativeSource +check: + type: CheckStream + stream_names: + - payouts +streams: + - type: DeclarativeStream + name: payouts + primary_key: + - payout_date + retriever: + type: SimpleRetriever + requester: + type: HttpRequester + url_base: https://api{{ '-' + config.region if config.region != 'eu' }}.{{ 'playground.' if config.playground }}klarna.com/ + path: /settlements/v1/payouts + http_method: GET + request_parameters: {} + request_headers: {} + authenticator: + type: BasicHttpAuthenticator + username: "{{ config['username'] }}" + password: "{{ config['password'] }}" + request_body_json: {} + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - payouts + paginator: + type: DefaultPaginator + page_token_option: + type: RequestPath + page_size_option: + inject_into: request_parameter + type: RequestOption + field_name: size + pagination_strategy: + type: CursorPagination + page_size: 500 + cursor_value: '{{ response.get("pagination", {}).get("next", {}) }}' + stop_condition: '{{ not response.get("pagination", {}).get("next", {}) }}' + - type: DeclarativeStream + name: transactions + retriever: + type: SimpleRetriever + requester: + type: HttpRequester + url_base: >- + https://api{{ '-'+config.region if config.region != 'eu' }}.{{ + 'playground.' if config.playground }}klarna.com/ + path: /settlements/v1/transactions + http_method: GET + request_parameters: {} + request_headers: {} + authenticator: + type: BasicHttpAuthenticator + username: "{{ config['username'] }}" + password: "{{ config['password'] }}" + request_body_json: {} + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - transactions + paginator: + type: DefaultPaginator + page_token_option: + type: RequestPath + page_size_option: + inject_into: request_parameter + type: RequestOption + field_name: size + pagination_strategy: + type: CursorPagination + page_size: 500 + cursor_value: '{{ response.get("pagination", {}).get("next", {}) }}' + stop_condition: '{{ not response.get("pagination", {}).get("next", {}) }}' +spec: + documentation_url: https://docs.airbyte.com/integrations/sources/klarna + connection_specification: + $schema: http://json-schema.org/draft-07/schema# + title: Klarna Spec + type: object + required: + - region + - playground + - username + - password + additionalProperties: true + properties: + region: + title: Region + type: string + enum: + - eu + - us + - oc + description: Base url region (For playground eu https://docs.klarna.com/klarna-payments/api/payments-api/#tag/API-URLs). Supported 'eu', 'us', 'oc' + playground: + title: Playground + type: boolean + description: Propertie defining if connector is used against playground or production environment + default: false + username: + title: Username + type: string + description: Consists of your Merchant ID (eid) - a unique number that identifies your e-store, combined with a random string (https://developers.klarna.com/api/#authentication) + password: + title: Password + type: string + description: A string which is associated with your Merchant ID and is used to authorize use of Klarna's APIs (https://developers.klarna.com/api/#authentication) + airbyte_secret: true diff --git a/airbyte-integrations/connectors/source-klarna/source_klarna/schemas/payouts.json b/airbyte-integrations/connectors/source-klarna/source_klarna/schemas/payouts.json index e600cca16b69..ed12942e0f8b 100644 --- a/airbyte-integrations/connectors/source-klarna/source_klarna/schemas/payouts.json +++ b/airbyte-integrations/connectors/source-klarna/source_klarna/schemas/payouts.json @@ -1,17 +1,10 @@ { + "$schema": "https://json-schema.org/draft-07/schema#", "type": "object", "additionalProperties": true, - "required": [ - "totals", - "payment_reference", - "payout_date", - "currency_code", - "merchant_settlement_type", - "merchant_id" - ], "properties": { "totals": { - "type": "object", + "type": ["null", "object"], "additionalProperties": true, "properties": { "commission_amount": { @@ -130,6 +123,5 @@ "example": "https://{settlements_api}/transactions?payment_reference=XISA93DJ", "type": "string" } - }, - "$schema": "http://json-schema.org/schema#" + } } diff --git a/airbyte-integrations/connectors/source-klarna/source_klarna/schemas/transactions.json b/airbyte-integrations/connectors/source-klarna/source_klarna/schemas/transactions.json index 2ab0d30627c1..82b7a0faf690 100644 --- a/airbyte-integrations/connectors/source-klarna/source_klarna/schemas/transactions.json +++ b/airbyte-integrations/connectors/source-klarna/source_klarna/schemas/transactions.json @@ -1,7 +1,7 @@ { + "$schema": "https://json-schema.org/draft-07/schema#", "type": "object", "additionalProperties": true, - "required": ["capture_id", "sale_date", "capture_date", "order_id"], "properties": { "amount": { "description": "Total amount of the specific transaction, in minor units", @@ -9,6 +9,20 @@ "type": "integer", "format": "int64" }, + "merchant_id": { + "type": ["null", "string"] + }, + "shipping_address_country": { + "type": ["null", "string"] + }, + "consumer_vat": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "additionalProperties": true, + "properties": {} + } + }, "capture_id": { "description": "The Klarna assigned id reference of a specific capture", "example": "33db6f16-9f43-43fa-a587-cc51411c98e4", @@ -186,6 +200,5 @@ "description": "ISO 4217 Currency Code of the country you are registered in.", "example": "EUR" } - }, - "$schema": "http://json-schema.org/schema#" + } } diff --git a/airbyte-integrations/connectors/source-klarna/source_klarna/source.py b/airbyte-integrations/connectors/source-klarna/source_klarna/source.py index dbc2a848d012..af5eb612a3c6 100644 --- a/airbyte-integrations/connectors/source-klarna/source_klarna/source.py +++ b/airbyte-integrations/connectors/source-klarna/source_klarna/source.py @@ -2,117 +2,17 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource -from abc import ABC -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple -from urllib.parse import parse_qs, urlparse +""" +This file provides the necessary constructs to interpret a provided declarative YAML configuration file into +source connector. -import requests -from airbyte_cdk.models import SyncMode -from airbyte_cdk.sources import AbstractSource -from airbyte_cdk.sources.streams import Stream -from airbyte_cdk.sources.streams.http import HttpStream -from airbyte_cdk.sources.streams.http.requests_native_auth import BasicHttpAuthenticator +WARNING: Do not modify this file. +""" -# Basic full refresh stream -class KlarnaStream(HttpStream, ABC): - def __init__(self, region: str, playground: bool, authenticator: BasicHttpAuthenticator, **kwargs): - self.region = region - self.playground = playground - self.kwargs = kwargs - super().__init__(authenticator=authenticator) - - page_size = 500 - data_api_field: str - - @property - def url_base(self) -> str: - playground_path = "playground." if self.playground else "" - if self.region == "eu": - endpoint = f"https://api.{playground_path}klarna.com/" - else: - endpoint = f"https://api-{self.region}.{playground_path}klarna.com/" - return endpoint - - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: - response_json = response.json() - if "next" in response_json.get("pagination", {}).keys(): - parsed_url = urlparse(response_json["pagination"]["next"]) - query_params = parse_qs(parsed_url.query) - # noinspection PyTypeChecker - return query_params - else: - return None - - def request_params( - self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None - ) -> MutableMapping[str, Any]: - if next_page_token: - return dict(next_page_token) - else: - return {"offset": 0, "size": self.page_size} - - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: - """ - :return an iterable containing each record in the response - """ - payouts = response.json().get(self.data_api_field, []) - yield from payouts - - -class Payouts(KlarnaStream): - """ - Payouts read from Klarna Settlements API https://developers.klarna.com/api/?json#settlements-api - """ - - primary_key = "payout_date" # TODO verify - data_api_field = "payouts" - - def path( - self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None - ) -> str: - return "/settlements/v1/payouts" - - -class Transactions(KlarnaStream): - """ - Transactions read from Klarna Settlements API https://developers.klarna.com/api/?json#settlements-api - """ - - primary_key = "capture_id" # TODO verify - data_api_field = "transactions" - - def path( - self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None - ) -> str: - return "/settlements/v1/transactions" - - -# Source -class SourceKlarna(AbstractSource): - def check_connection(self, logger, config) -> Tuple[bool, any]: - """ - :param config: the user-input config object conforming to the connector's spec.yaml - :param logger: logger object - :return Tuple[bool, any]: (True, None) if the input config can be used to connect to the API successfully, (False, error) otherwise. - """ - try: - auth = BasicHttpAuthenticator(username=config["username"], password=config["password"]) - conn_test_stream = Transactions(authenticator=auth, **config) - conn_test_stream.page_size = 1 - conn_test_stream.next_page_token = lambda x: None - records = conn_test_stream.read_records(sync_mode=SyncMode.full_refresh) - # Try to read one value from records iterator - next(records, None) - return True, None - except Exception as e: - print(e) - return False, repr(e) - - def streams(self, config: Mapping[str, Any]) -> List[Stream]: - """ - :param config: A Mapping of the user input configuration as defined in the connector spec. - """ - auth = BasicHttpAuthenticator(username=config["username"], password=config["password"]) - return [Payouts(authenticator=auth, **config), Transactions(authenticator=auth, **config)] +# Declarative Source +class SourceKlarna(YamlDeclarativeSource): + def __init__(self): + super().__init__(**{"path_to_yaml": "manifest.yaml"}) diff --git a/airbyte-integrations/connectors/source-klarna/source_klarna/spec.yaml b/airbyte-integrations/connectors/source-klarna/source_klarna/spec.yaml deleted file mode 100644 index e2d7dfc9c71f..000000000000 --- a/airbyte-integrations/connectors/source-klarna/source_klarna/spec.yaml +++ /dev/null @@ -1,34 +0,0 @@ -documentationUrl: https://docs.airbyte.com/integrations/sources/klarna -connectionSpecification: - $schema: http://json-schema.org/draft-07/schema# - title: Klarna Spec - type: object - required: - - region - - playground - - username - - password - additionalProperties: true - properties: - region: - title: Region - type: string - enum: - - eu - - us - - oc - description: Base url region (For playground eu https://docs.klarna.com/klarna-payments/api/payments-api/#tag/API-URLs). Supported 'eu', 'us', 'oc' - playground: - title: Playground - type: boolean - description: Propertie defining if connector is used against playground or production environment - default: false - username: - title: Username - type: string - description: Consists of your Merchant ID (eid) - a unique number that identifies your e-store, combined with a random string (https://developers.klarna.com/api/#authentication) - password: - title: Password - type: string - description: A string which is associated with your Merchant ID and is used to authorize use of Klarna's APIs (https://developers.klarna.com/api/#authentication) - airbyte_secret: true diff --git a/airbyte-integrations/connectors/source-klarna/unit_tests/conftest.py b/airbyte-integrations/connectors/source-klarna/unit_tests/conftest.py deleted file mode 100644 index d06b5a1cdb35..000000000000 --- a/airbyte-integrations/connectors/source-klarna/unit_tests/conftest.py +++ /dev/null @@ -1,23 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import pytest as pytest -from airbyte_cdk.sources.streams.http.requests_native_auth import BasicHttpAuthenticator -from source_klarna import SourceKlarna -from source_klarna.source import KlarnaStream - - -@pytest.fixture(name="source_klarna") -def get_source_klarna(): - return SourceKlarna() - - -@pytest.fixture(name="klarna_config") -def get_klarna_config(): - return dict(playground=False, region="eu", username="user", password="password") - - -@pytest.fixture(name="klarna_stream") -def get_klarna_stream(klarna_config): - return KlarnaStream(authenticator=BasicHttpAuthenticator("", ""), **klarna_config) diff --git a/airbyte-integrations/connectors/source-klarna/unit_tests/test_source.py b/airbyte-integrations/connectors/source-klarna/unit_tests/test_source.py deleted file mode 100644 index 2b6b012b5ca2..000000000000 --- a/airbyte-integrations/connectors/source-klarna/unit_tests/test_source.py +++ /dev/null @@ -1,24 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -from unittest.mock import MagicMock - -import responses -from source_klarna.source import SourceKlarna - - -@responses.activate -def test_check_connection(mocker, source_klarna, klarna_config): - responses.add(responses.GET, "https://api.klarna.com/settlements/v1/transactions?offset=0&size=1", json={}) - - logger_mock, config_mock = MagicMock(), klarna_config - assert source_klarna.check_connection(logger_mock, config_mock) == (True, None) - - -def test_streams(mocker, klarna_config): - source = SourceKlarna() - config_mock = klarna_config - streams = source.streams(config_mock) - expected_streams_number = 2 - assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-klarna/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-klarna/unit_tests/test_streams.py deleted file mode 100644 index ca2f5dc37bcc..000000000000 --- a/airbyte-integrations/connectors/source-klarna/unit_tests/test_streams.py +++ /dev/null @@ -1,92 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -from http import HTTPStatus -from unittest.mock import MagicMock - -import pytest -from airbyte_cdk.sources.streams.http.requests_native_auth import BasicHttpAuthenticator -from source_klarna.source import KlarnaStream, Payouts, Transactions - - -@pytest.fixture -def patch_base_class(mocker): - # Mock abstract methods to enable instantiating abstract class - mocker.patch.object(KlarnaStream, "path", "v0/example_endpoint") - mocker.patch.object(KlarnaStream, "primary_key", "test_primary_key") - mocker.patch.object(KlarnaStream, "__abstractmethods__", set()) - - -def test_request_params(patch_base_class, klarna_stream): - inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None} - expected_params = {"offset": 0, "size": 500} - assert klarna_stream.request_params(**inputs) == expected_params - - -@pytest.mark.parametrize( - "total,count,offset,next_,expected_params", - [ - (9, 4, 0, "https://api.playground.klarna.com/settlements/v1/payouts?offset=4&size=4", {"offset": ["4"], "size": ["4"]}), - (9, 4, 4, "https://api.playground.klarna.com/settlements/v1/payouts?offset=48&size=4", {"offset": ["48"], "size": ["4"]}), - ], -) -def test_next_page_token(patch_base_class, klarna_stream, total, count, offset, next_, expected_params): - response_mock = MagicMock() - response_mock.json.return_value = { - "pagination": { - "total": total, - "count": count, - "offset": offset, - "next": next_, - } - } - inputs = {"response": response_mock} - assert klarna_stream.next_page_token(**inputs) == expected_params - - -@pytest.mark.parametrize( - ("specific_klarna_stream", "response"), - [ - (Payouts, {"payouts": [{}]}), - (Transactions, {"transactions": [{}]}), - ], -) -def test_parse_response(patch_base_class, klarna_config, specific_klarna_stream, response): - mock_response = MagicMock() - mock_response.json.return_value = response - inputs = {"response": mock_response, "stream_state": {}} - stream = specific_klarna_stream(authenticator=BasicHttpAuthenticator("", ""), **klarna_config) - assert next(stream.parse_response(**inputs)) == {} - - -def test_request_headers(patch_base_class, klarna_stream): - inputs = {"stream_slice": None, "stream_state": None, "next_page_token": None} - expected_headers = {} - assert klarna_stream.request_headers(**inputs) == expected_headers - - -def test_http_method(patch_base_class, klarna_stream): - expected_method = "GET" - assert klarna_stream.http_method == expected_method - - -@pytest.mark.parametrize( - ("http_status", "should_retry"), - [ - (HTTPStatus.OK, False), - (HTTPStatus.BAD_REQUEST, False), - (HTTPStatus.TOO_MANY_REQUESTS, True), - (HTTPStatus.INTERNAL_SERVER_ERROR, True), - ], -) -def test_should_retry(patch_base_class, http_status, should_retry, klarna_stream): - response_mock = MagicMock() - response_mock.status_code = http_status - assert klarna_stream.should_retry(response_mock) == should_retry - - -def test_backoff_time(patch_base_class, klarna_stream): - response_mock = MagicMock() - expected_backoff_time = None - assert klarna_stream.backoff_time(response_mock) == expected_backoff_time diff --git a/airbyte-integrations/connectors/source-klaviyo/acceptance-test-config.yml b/airbyte-integrations/connectors/source-klaviyo/acceptance-test-config.yml index 6115b87777fe..f8ad8f3e3985 100644 --- a/airbyte-integrations/connectors/source-klaviyo/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-klaviyo/acceptance-test-config.yml @@ -18,6 +18,9 @@ acceptance_tests: discovery: tests: - config_path: secrets/config.json + backward_compatibility_tests_config: + disable_for_version: "1.0.0" + previous_connector_version: "0.5.0" full_refresh: tests: - config_path: secrets/config.json diff --git a/airbyte-integrations/connectors/source-klaviyo/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-klaviyo/integration_tests/expected_records.jsonl index dc2a8199ff35..394317227980 100644 --- a/airbyte-integrations/connectors/source-klaviyo/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-klaviyo/integration_tests/expected_records.jsonl @@ -1,10 +1,10 @@ {"stream": "campaigns", "data": {"object": "campaign", "id": "VFaYVy", "name": "Email Campaign 2021-05-16 19:17:45", "subject": "My Test subject", "from_email": "integration-test@airbyte.io", "from_name": "Airbyte", "lists": [{"object": "list", "id": "RnsiHB", "name": "Newsletter", "list_type": "list", "folder": null, "created": "2021-03-31T10:50:36+00:00", "updated": "2021-03-31T10:50:36+00:00", "person_count": 1}, {"object": "list", "id": "TaSce6", "name": "Preview List", "list_type": "list", "folder": null, "created": "2021-03-31T10:50:37+00:00", "updated": "2021-03-31T10:50:37+00:00", "person_count": 1}], "excluded_lists": [{"object": "list", "id": "Ukh37W", "name": "Unengaged (3 Months)", "list_type": "segment", "folder": null, "created": "2021-03-31T10:50:37+00:00", "updated": "2021-03-31T10:50:43+00:00", "person_count": 0}], "status": "sent", "status_id": 1, "status_label": "Sent", "sent_at": "2021-05-26T23:30:13+00:00", "send_time": "2021-05-26T23:30:00+00:00", "created": "2021-05-16T23:17:45+00:00", "updated": "2021-05-26T23:30:13+00:00", "num_recipients": 1, "campaign_type": "Batch", "is_segmented": true, "message_type": "email", "template_id": "VR2KEG"}, "emitted_at": 1663367156487} {"stream": "campaigns", "data": {"object": "campaign", "id": "T4hgvQ", "name": "Email Campaign 2021-05-12 16:45:46", "subject": "", "from_email": "integration-test@airbyte.io", "from_name": "Airbyte", "lists": [], "excluded_lists": [], "status": "draft", "status_id": 2, "status_label": "Draft", "sent_at": null, "send_time": null, "created": "2021-05-12T20:45:47+00:00", "updated": "2021-05-12T20:45:47+00:00", "num_recipients": 0, "campaign_type": "Regular", "is_segmented": false, "message_type": "email", "template_id": null}, "emitted_at": 1663367156491} -{"stream": "events", "data": {"object": "event", "id": "3qvdbYg3", "statistic_id": "VFFb4u", "timestamp": 1621295008, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295008" }, "datetime": "2021-05-17 23:43:28+00:00", "uuid": "adc8d000-b769-11eb-8001-28a6687f81c3", "person": { "object": "person", "id": "01F5YBDQE9W7WDSH9KK398CAYX", "$address1": "", "$address2": "", "$city": "", "$country": "", "$latitude": "", "$longitude": "", "$region": "", "$zip": "", "$last_name": "", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.{seed}@airbyte.io", "$first_name": "", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.{seed}@airbyte.io", "first_name": "", "last_name": "", "created": "2021-05-17 23:43:50", "updated": "2021-05-17 23:43:50" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} -{"stream": "events", "data": {"object": "event", "id": "3qvdgpzF", "statistic_id": "VFFb4u", "timestamp": 1621295124, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295124" }, "datetime": "2021-05-17 23:45:24+00:00", "uuid": "f2ed0200-b769-11eb-8001-76152f6b1c82", "person": { "object": "person", "id": "01F5YBGKW1SQN453RM293PHH37", "$address1": "", "$address2": "", "$city": "Springfield", "$country": "", "$latitude": "", "$longitude": "", "$region": "Illinois", "$zip": "", "$last_name": "Last Name 0", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.0@airbyte.io", "$first_name": "First Name 0", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.0@airbyte.io", "first_name": "First Name 0", "last_name": "Last Name 0", "created": "2021-05-17 23:45:24", "updated": "2021-05-17 23:45:25" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} -{"stream": "events", "data": {"object": "event", "id": "3qvdgr5Z", "statistic_id": "VFFb4u", "timestamp": 1621295124, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295124" }, "datetime": "2021-05-17 23:45:24+00:00", "uuid": "f2ed0200-b769-11eb-8001-b642ddab48ad", "person": { "object": "person", "id": "01F5YBGM7J4YD4P6EYK5Q87BG4", "$address1": "", "$address2": "", "$city": "Springfield", "$country": "", "$latitude": "", "$longitude": "", "$region": "Illinois", "$zip": "", "$last_name": "Last Name 1", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.1@airbyte.io", "$first_name": "First Name 1", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.1@airbyte.io", "first_name": "First Name 1", "last_name": "Last Name 1", "created": "2021-05-17 23:45:25", "updated": "2021-05-17 23:45:26" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} -{"stream": "events", "data": {"object": "event", "id": "3qvdgBgK", "statistic_id": "VFFb4u", "timestamp": 1621295124, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295124" }, "datetime": "2021-05-17 23:45:24+00:00", "uuid": "f2ed0200-b769-11eb-8001-2006a2b2b6e7", "person": { "object": "person", "id": "01F5YBGMK62AJR0955G7NW6EP7", "$address1": "", "$address2": "", "$city": "Springfield", "$country": "", "$latitude": "", "$longitude": "", "$region": "Illinois", "$zip": "", "$last_name": "Last Name 2", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.2@airbyte.io", "$first_name": "First Name 2", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.2@airbyte.io", "first_name": "First Name 2", "last_name": "Last Name 2", "created": "2021-05-17 23:45:25", "updated": "2021-05-17 23:45:38" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} -{"stream": "events", "data": {"object": "event", "id": "3qvdgs9P", "statistic_id": "VFFb4u", "timestamp": 1621295125, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295125" }, "datetime": "2021-05-17 23:45:25+00:00", "uuid": "f3859880-b769-11eb-8001-f6a061424b91", "person": { "object": "person", "id": "01F5YBGMK62AJR0955G7NW6EP7", "$address1": "", "$address2": "", "$city": "Springfield", "$country": "", "$latitude": "", "$longitude": "", "$region": "Illinois", "$zip": "", "$last_name": "Last Name 2", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.2@airbyte.io", "$first_name": "First Name 2", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.2@airbyte.io", "first_name": "First Name 2", "last_name": "Last Name 2", "created": "2021-05-17 23:45:25", "updated": "2021-05-17 23:45:38" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} +{"stream": "events", "data": {"object": "event", "id": "3qvdbYg3", "statistic_id": "VFFb4u", "timestamp": 1621295008, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295008" }, "datetime": "2021-05-17T23:43:28+00:00", "uuid": "adc8d000-b769-11eb-8001-28a6687f81c3", "person": { "object": "person", "id": "01F5YBDQE9W7WDSH9KK398CAYX", "$address1": "", "$address2": "", "$city": "", "$country": "", "$latitude": "", "$longitude": "", "$region": "", "$zip": "", "$last_name": "", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.{seed}@airbyte.io", "$first_name": "", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.{seed}@airbyte.io", "first_name": "", "last_name": "", "created": "2021-05-17 23:43:50", "updated": "2021-05-17 23:43:50" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} +{"stream": "events", "data": {"object": "event", "id": "3qvdgpzF", "statistic_id": "VFFb4u", "timestamp": 1621295124, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295124" }, "datetime": "2021-05-17T23:45:24+00:00", "uuid": "f2ed0200-b769-11eb-8001-76152f6b1c82", "person": { "object": "person", "id": "01F5YBGKW1SQN453RM293PHH37", "$address1": "", "$address2": "", "$city": "Springfield", "$country": "", "$latitude": "", "$longitude": "", "$region": "Illinois", "$zip": "", "$last_name": "Last Name 0", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.0@airbyte.io", "$first_name": "First Name 0", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.0@airbyte.io", "first_name": "First Name 0", "last_name": "Last Name 0", "created": "2021-05-17 23:45:24", "updated": "2021-05-17 23:45:25" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} +{"stream": "events", "data": {"object": "event", "id": "3qvdgr5Z", "statistic_id": "VFFb4u", "timestamp": 1621295124, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295124" }, "datetime": "2021-05-17T23:45:24+00:00", "uuid": "f2ed0200-b769-11eb-8001-b642ddab48ad", "person": { "object": "person", "id": "01F5YBGM7J4YD4P6EYK5Q87BG4", "$address1": "", "$address2": "", "$city": "Springfield", "$country": "", "$latitude": "", "$longitude": "", "$region": "Illinois", "$zip": "", "$last_name": "Last Name 1", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.1@airbyte.io", "$first_name": "First Name 1", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.1@airbyte.io", "first_name": "First Name 1", "last_name": "Last Name 1", "created": "2021-05-17 23:45:25", "updated": "2021-05-17 23:45:26" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} +{"stream": "events", "data": {"object": "event", "id": "3qvdgBgK", "statistic_id": "VFFb4u", "timestamp": 1621295124, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295124" }, "datetime": "2021-05-17T23:45:24+00:00", "uuid": "f2ed0200-b769-11eb-8001-2006a2b2b6e7", "person": { "object": "person", "id": "01F5YBGMK62AJR0955G7NW6EP7", "$address1": "", "$address2": "", "$city": "Springfield", "$country": "", "$latitude": "", "$longitude": "", "$region": "Illinois", "$zip": "", "$last_name": "Last Name 2", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.2@airbyte.io", "$first_name": "First Name 2", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.2@airbyte.io", "first_name": "First Name 2", "last_name": "Last Name 2", "created": "2021-05-17 23:45:25", "updated": "2021-05-17 23:45:38" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} +{"stream": "events", "data": {"object": "event", "id": "3qvdgs9P", "statistic_id": "VFFb4u", "timestamp": 1621295125, "event_name": "Clicked Email", "event_properties": { "$event_id": "1621295125" }, "datetime": "2021-05-17T23:45:25+00:00", "uuid": "f3859880-b769-11eb-8001-f6a061424b91", "person": { "object": "person", "id": "01F5YBGMK62AJR0955G7NW6EP7", "$address1": "", "$address2": "", "$city": "Springfield", "$country": "", "$latitude": "", "$longitude": "", "$region": "Illinois", "$zip": "", "$last_name": "Last Name 2", "$title": "", "$organization": "", "$phone_number": "", "$email": "some.email.that.dont.exist.2@airbyte.io", "$first_name": "First Name 2", "$timezone": "", "$id": "", "email": "some.email.that.dont.exist.2@airbyte.io", "first_name": "First Name 2", "last_name": "Last Name 2", "created": "2021-05-17 23:45:25", "updated": "2021-05-17 23:45:38" }, "flow_id": null, "flow_message_id": null, "campaign_id": null }, "emitted_at": 1663367160652} {"stream": "global_exclusions", "data": {"object": "exclusion", "email": "some.email.that.dont.exist.9@airbyte.io", "reason": "manually_excluded", "timestamp": "2021-05-18T01:20:01+00:00"}, "emitted_at": 1663367161413} {"stream": "global_exclusions", "data": {"object": "exclusion", "email": "some.email.that.dont.exist.8@airbyte.io", "reason": "manually_excluded", "timestamp": "2021-05-18T01:29:51+00:00"}, "emitted_at": 1663367161413} {"stream": "lists", "data": {"object": "list", "id": "RnsiHB", "name": "Newsletter", "list_type": "list", "folder": null, "created": "2021-03-31T10:50:36+00:00", "updated": "2021-03-31T10:50:36+00:00", "person_count": 1}, "emitted_at": 1663367161878} diff --git a/airbyte-integrations/connectors/source-klaviyo/metadata.yaml b/airbyte-integrations/connectors/source-klaviyo/metadata.yaml index 6610a41bf7cf..844e4840d221 100644 --- a/airbyte-integrations/connectors/source-klaviyo/metadata.yaml +++ b/airbyte-integrations/connectors/source-klaviyo/metadata.yaml @@ -6,7 +6,9 @@ data: connectorSubtype: api connectorType: source definitionId: 95e8cffd-b8c4-4039-968e-d32fb4a69bde - dockerImageTag: 0.4.0 + connectorBuildOptions: + baseImage: docker.io/airbyte/python-connector-base:1.1.0@sha256:bd98f6505c6764b1b5f99d3aedc23dfc9e9af631a62533f60eb32b1d3dbab20c + dockerImageTag: 1.0.0 dockerRepository: airbyte/source-klaviyo githubIssueLabel: source-klaviyo icon: klaviyo.svg @@ -18,6 +20,11 @@ data: oss: enabled: true releaseStage: generally_available + releases: + breakingChanges: + 1.0.0: + message: In this release, for 'events' stream changed type of 'event_properties/items/quantity' field from integer to number. Users will need to refresh the source schema and reset events streams after upgrading. + upgradeDeadline: "2023-11-30" documentationUrl: https://docs.airbyte.com/integrations/sources/klaviyo tags: - language:python diff --git a/airbyte-integrations/connectors/source-klaviyo/setup.py b/airbyte-integrations/connectors/source-klaviyo/setup.py index 3603d9d4885b..32a31edb0848 100644 --- a/airbyte-integrations/connectors/source-klaviyo/setup.py +++ b/airbyte-integrations/connectors/source-klaviyo/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk~=0.1"] +MAIN_REQUIREMENTS = ["airbyte-cdk"] TEST_REQUIREMENTS = ["requests-mock~=1.9.3", "pytest~=6.1", "pytest-mock", "requests_mock~=1.8"] diff --git a/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/schemas/events.json b/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/schemas/events.json index 7cf830f697a8..d8adc26d6f39 100644 --- a/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/schemas/events.json +++ b/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/schemas/events.json @@ -6,7 +6,7 @@ "uuid": { "type": "string" }, "event_name": { "type": "string" }, "timestamp": { "type": "integer" }, - "datetime": { "type": "string" }, + "datetime": { "type": "string", "format": "date-time" }, "statistic_id": { "type": "string" }, "event_properties": { "type": "object", @@ -21,11 +21,23 @@ "name": { "type": "string" }, "sku": { "type": "string" }, "price": { "type": "number" }, - "quantity": { "type": "integer" } + "quantity": { "type": "number" } } } } - } + }, + "$event_id": { "type": ["null", "string"] }, + "$flow ": { "type": ["null", "string"] }, + "$message": { "type": ["null", "string"] }, + "Campaign Name": { "type": ["null", "string"] }, + "Client Canonical": { "type": ["null", "string"] }, + "Client Name": { "type": ["null", "string"] }, + "Client OS": { "type": ["null", "string"] }, + "Client OS Family": { "type": ["null", "string"] }, + "Client Type": { "type": ["null", "string"] }, + "Email Domain": { "type": ["null", "string"] }, + "Subject": { "type": ["null", "string"] }, + "URL": { "type": ["null", "string"] } }, "person": { "type": "object", diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/Dockerfile b/airbyte-integrations/connectors/source-open-exchange-rates/Dockerfile index c21604d3beef..fe66fbab6753 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/Dockerfile +++ b/airbyte-integrations/connectors/source-open-exchange-rates/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9.13-alpine3.15 as base +FROM python:3.9.11-alpine3.15 as base # build and load all requirements FROM base as builder @@ -34,5 +34,5 @@ COPY source_open_exchange_rates ./source_open_exchange_rates ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.2.0 LABEL io.airbyte.name=airbyte/source-open-exchange-rates diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/README.md b/airbyte-integrations/connectors/source-open-exchange-rates/README.md index 964528311341..9bd277498329 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/README.md +++ b/airbyte-integrations/connectors/source-open-exchange-rates/README.md @@ -1,35 +1,10 @@ # Open Exchange Rates Source -This is the repository for the Open Exchange Rates source connector, written in Python. -For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/open-exchange-rates). +This is the repository for the Open Exchange Rates configuration based source connector. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/sources/open-exchange-rates). ## Local development -### Prerequisites -**To iterate on this connector, make sure to complete this prerequisites section.** - -#### Minimum Python version required `= 3.9.0` - -#### Build & Activate Virtual Environment and install dependencies -From this connector directory, create a virtual environment: -``` -python -m venv .venv -``` - -This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your -development environment of choice. To activate it from the terminal, run: -``` -source .venv/bin/activate -pip install -r requirements.txt -pip install '.[tests]' -``` -If you are in an IDE, follow your IDE's instructions to activate the virtualenv. - -Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is -used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. -If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything -should work as you expect. - #### Building via Gradle You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. @@ -39,7 +14,7 @@ To build using Gradle, from the Airbyte repository root, run: ``` #### Create credentials -**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/open-exchange-rates) +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/open-exchange-rates) to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_open_exchange_rates/spec.yaml` file. Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. See `integration_tests/sample_config.json` for a sample config file. @@ -47,14 +22,6 @@ See `integration_tests/sample_config.json` for a sample config file. **If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source open-exchange-rates test creds` and place them into `secrets/config.json`. -### Locally running the connector -``` -python main.py spec -python main.py check --config secrets/config.json -python main.py discover --config secrets/config.json -python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json -``` - ### Locally running the connector docker image #### Build @@ -79,32 +46,15 @@ docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-open-exchange-rates:de docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-open-exchange-rates:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json ``` ## Testing -Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. -First install test dependencies into your virtual environment: -``` -pip install .[tests] -``` -### Unit Tests -To run unit tests locally, from the connector directory run: -``` -python -m pytest unit_tests -``` -### Integration Tests -There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). -#### Custom Integration tests -Place custom tests inside `integration_tests/` folder, then, from the connector root, run -``` -python -m pytest integration_tests -``` #### Acceptance Tests -Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. -To run your integration tests with acceptance tests, from the connector root, run + +To run your integration tests with Docker, run: ``` -python -m pytest integration_tests -p integration_tests.acceptance +./acceptance-test-docker.sh ``` -To run your integration tests with docker ### Using gradle to run tests All commands should be run from airbyte project root. diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/__init__.py b/airbyte-integrations/connectors/source-open-exchange-rates/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-integrations/connectors/source-open-exchange-rates/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/acceptance-test-config.yml b/airbyte-integrations/connectors/source-open-exchange-rates/acceptance-test-config.yml index 9135f16903f5..483fe518fa28 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-open-exchange-rates/acceptance-test-config.yml @@ -1,5 +1,7 @@ +# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) +# for more information about how to configure these tests connector_image: airbyte/source-open-exchange-rates:dev - +test_strictness_level: low acceptance_tests: spec: tests: @@ -18,9 +20,19 @@ acceptance_tests: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" empty_streams: [] + # expect_records: + # path: "integration_tests/expected_records.jsonl" + # extra_fields: no + # exact_order: no + # extra_records: yes incremental: + # bypass_reason: "This connector does not implement incremental sync" tests: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" future_state: future_state_path: "integration_tests/abnormal_state.json" + full_refresh: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-open-exchange-rates/acceptance-test-docker.sh index c51577d10690..b6d65deeccb4 100755 --- a/airbyte-integrations/connectors/source-open-exchange-rates/acceptance-test-docker.sh +++ b/airbyte-integrations/connectors/source-open-exchange-rates/acceptance-test-docker.sh @@ -1,16 +1,3 @@ #!/usr/bin/env sh -# Build latest connector image -docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) - -# Pull latest acctest image -docker pull airbyte/source-acceptance-test:latest - -# Run -docker run --rm -it \ - -v /var/run/docker.sock:/var/run/docker.sock \ - -v /tmp:/tmp \ - -v $(pwd):/test_input \ - airbyte/source-acceptance-test \ - --acceptance-test-config /test_input - +source "$(git rev-parse --show-toplevel)/airbyte-integrations/bases/connector-acceptance-test/acceptance-test-docker.sh" diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/__init__.py b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/__init__.py index 1100c1c58cf5..c941b3045795 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/__init__.py +++ b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/__init__.py @@ -1,3 +1,3 @@ # -# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. # diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/abnormal_state.json index 8f84fbf71fca..8d8a421ddc4b 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/abnormal_state.json @@ -1,5 +1,9 @@ -{ - "open_exchange_rates": { - "timestamp": 2052084644 +[ + { + "type": "STREAM", + "stream": { + "stream_state": { "timestamp": 3052084644 }, + "stream_descriptor": { "name": "open_exchange_rates" } + } } -} +] diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/acceptance.py index d49b55882333..9e6409236281 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/acceptance.py +++ b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/acceptance.py @@ -10,4 +10,7 @@ @pytest.fixture(scope="session", autouse=True) def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/configured_catalog.json index 5bc2570a65a6..40c44b9de3be 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/configured_catalog.json @@ -3,539 +3,14 @@ { "stream": { "name": "open_exchange_rates", - "json_schema": { - "type": "object", - "properties": { - "base": { - "type": "string" - }, - "timestamp": { - "type": "integer" - }, - "rates": { - "type": "object", - "properties": { - "AED": { - "type": ["null", "number"] - }, - "AFN": { - "type": ["null", "number"] - }, - "ALL": { - "type": ["null", "number"] - }, - "AMD": { - "type": ["null", "number"] - }, - "ANG": { - "type": ["null", "number"] - }, - "AOA": { - "type": ["null", "number"] - }, - "ARS": { - "type": ["null", "number"] - }, - "AUD": { - "type": ["null", "number"] - }, - "AWG": { - "type": ["null", "number"] - }, - "AZN": { - "type": ["null", "number"] - }, - "BAM": { - "type": ["null", "number"] - }, - "BBD": { - "type": ["null", "number"] - }, - "BDT": { - "type": ["null", "number"] - }, - "BGN": { - "type": ["null", "number"] - }, - "BHD": { - "type": ["null", "number"] - }, - "BIF": { - "type": ["null", "number"] - }, - "BMD": { - "type": ["null", "number"] - }, - "BND": { - "type": ["null", "number"] - }, - "BOB": { - "type": ["null", "number"] - }, - "BRL": { - "type": ["null", "number"] - }, - "BSD": { - "type": ["null", "number"] - }, - "BTC": { - "type": ["null", "number"] - }, - "BTN": { - "type": ["null", "number"] - }, - "BWP": { - "type": ["null", "number"] - }, - "BYN": { - "type": ["null", "number"] - }, - "BZD": { - "type": ["null", "number"] - }, - "CAD": { - "type": ["null", "number"] - }, - "CDF": { - "type": ["null", "number"] - }, - "CHF": { - "type": ["null", "number"] - }, - "CLF": { - "type": ["null", "number"] - }, - "CLP": { - "type": ["null", "number"] - }, - "CNH": { - "type": ["null", "number"] - }, - "CNY": { - "type": ["null", "number"] - }, - "COP": { - "type": ["null", "number"] - }, - "CRC": { - "type": ["null", "number"] - }, - "CUC": { - "type": ["null", "number"] - }, - "CUP": { - "type": ["null", "number"] - }, - "CVE": { - "type": ["null", "number"] - }, - "CZK": { - "type": ["null", "number"] - }, - "DJF": { - "type": ["null", "number"] - }, - "DKK": { - "type": ["null", "number"] - }, - "DOP": { - "type": ["null", "number"] - }, - "DZD": { - "type": ["null", "number"] - }, - "EGP": { - "type": ["null", "number"] - }, - "ERN": { - "type": ["null", "number"] - }, - "ETB": { - "type": ["null", "number"] - }, - "EUR": { - "type": ["null", "number"] - }, - "FJD": { - "type": ["null", "number"] - }, - "FKP": { - "type": ["null", "number"] - }, - "GBP": { - "type": ["null", "number"] - }, - "GEL": { - "type": ["null", "number"] - }, - "GGP": { - "type": ["null", "number"] - }, - "GHS": { - "type": ["null", "number"] - }, - "GIP": { - "type": ["null", "number"] - }, - "GMD": { - "type": ["null", "number"] - }, - "GNF": { - "type": ["null", "number"] - }, - "GTQ": { - "type": ["null", "number"] - }, - "GYD": { - "type": ["null", "number"] - }, - "HKD": { - "type": ["null", "number"] - }, - "HNL": { - "type": ["null", "number"] - }, - "HRK": { - "type": ["null", "number"] - }, - "HTG": { - "type": ["null", "number"] - }, - "HUF": { - "type": ["null", "number"] - }, - "IDR": { - "type": ["null", "number"] - }, - "ILS": { - "type": ["null", "number"] - }, - "IMP": { - "type": ["null", "number"] - }, - "INR": { - "type": ["null", "number"] - }, - "IQD": { - "type": ["null", "number"] - }, - "IRR": { - "type": ["null", "number"] - }, - "ISK": { - "type": ["null", "number"] - }, - "JEP": { - "type": ["null", "number"] - }, - "JMD": { - "type": ["null", "number"] - }, - "JOD": { - "type": ["null", "number"] - }, - "JPY": { - "type": ["null", "number"] - }, - "KES": { - "type": ["null", "number"] - }, - "KGS": { - "type": ["null", "number"] - }, - "KHR": { - "type": ["null", "number"] - }, - "KMF": { - "type": ["null", "number"] - }, - "KPW": { - "type": ["null", "number"] - }, - "KRW": { - "type": ["null", "number"] - }, - "KWD": { - "type": ["null", "number"] - }, - "KYD": { - "type": ["null", "number"] - }, - "KZT": { - "type": ["null", "number"] - }, - "LAK": { - "type": ["null", "number"] - }, - "LBP": { - "type": ["null", "number"] - }, - "LKR": { - "type": ["null", "number"] - }, - "LRD": { - "type": ["null", "number"] - }, - "LSL": { - "type": ["null", "number"] - }, - "LYD": { - "type": ["null", "number"] - }, - "MAD": { - "type": ["null", "number"] - }, - "MDL": { - "type": ["null", "number"] - }, - "MGA": { - "type": ["null", "number"] - }, - "MKD": { - "type": ["null", "number"] - }, - "MMK": { - "type": ["null", "number"] - }, - "MNT": { - "type": ["null", "number"] - }, - "MOP": { - "type": ["null", "number"] - }, - "MRO": { - "type": ["null", "number"] - }, - "MRU": { - "type": ["null", "number"] - }, - "MUR": { - "type": ["null", "number"] - }, - "MVR": { - "type": ["null", "number"] - }, - "MWK": { - "type": ["null", "number"] - }, - "MXN": { - "type": ["null", "number"] - }, - "MYR": { - "type": ["null", "number"] - }, - "MZN": { - "type": ["null", "number"] - }, - "NAD": { - "type": ["null", "number"] - }, - "NGN": { - "type": ["null", "number"] - }, - "NIO": { - "type": ["null", "number"] - }, - "NOK": { - "type": ["null", "number"] - }, - "NPR": { - "type": ["null", "number"] - }, - "NZD": { - "type": ["null", "number"] - }, - "OMR": { - "type": ["null", "number"] - }, - "PAB": { - "type": ["null", "number"] - }, - "PEN": { - "type": ["null", "number"] - }, - "PGK": { - "type": ["null", "number"] - }, - "PHP": { - "type": ["null", "number"] - }, - "PKR": { - "type": ["null", "number"] - }, - "PLN": { - "type": ["null", "number"] - }, - "PYG": { - "type": ["null", "number"] - }, - "QAR": { - "type": ["null", "number"] - }, - "RON": { - "type": ["null", "number"] - }, - "RSD": { - "type": ["null", "number"] - }, - "RUB": { - "type": ["null", "number"] - }, - "RWF": { - "type": ["null", "number"] - }, - "SAR": { - "type": ["null", "number"] - }, - "SBD": { - "type": ["null", "number"] - }, - "SCR": { - "type": ["null", "number"] - }, - "SDG": { - "type": ["null", "number"] - }, - "SEK": { - "type": ["null", "number"] - }, - "SGD": { - "type": ["null", "number"] - }, - "SHP": { - "type": ["null", "number"] - }, - "SLL": { - "type": ["null", "number"] - }, - "SOS": { - "type": ["null", "number"] - }, - "SRD": { - "type": ["null", "number"] - }, - "SSP": { - "type": ["null", "number"] - }, - "STD": { - "type": ["null", "number"] - }, - "STN": { - "type": ["null", "number"] - }, - "SVC": { - "type": ["null", "number"] - }, - "SYP": { - "type": ["null", "number"] - }, - "SZL": { - "type": ["null", "number"] - }, - "THB": { - "type": ["null", "number"] - }, - "TJS": { - "type": ["null", "number"] - }, - "TMT": { - "type": ["null", "number"] - }, - "TND": { - "type": ["null", "number"] - }, - "TOP": { - "type": ["null", "number"] - }, - "TRY": { - "type": ["null", "number"] - }, - "TTD": { - "type": ["null", "number"] - }, - "TWD": { - "type": ["null", "number"] - }, - "TZS": { - "type": ["null", "number"] - }, - "UAH": { - "type": ["null", "number"] - }, - "UGX": { - "type": ["null", "number"] - }, - "USD": { - "type": ["null", "number"] - }, - "UYU": { - "type": ["null", "number"] - }, - "UZS": { - "type": ["null", "number"] - }, - "VES": { - "type": ["null", "number"] - }, - "VND": { - "type": ["null", "number"] - }, - "VUV": { - "type": ["null", "number"] - }, - "WST": { - "type": ["null", "number"] - }, - "XAF": { - "type": ["null", "number"] - }, - "XAG": { - "type": ["null", "number"] - }, - "XAU": { - "type": ["null", "number"] - }, - "XCD": { - "type": ["null", "number"] - }, - "XDR": { - "type": ["null", "number"] - }, - "XOF": { - "type": ["null", "number"] - }, - "XPD": { - "type": ["null", "number"] - }, - "XPF": { - "type": ["null", "number"] - }, - "XPT": { - "type": ["null", "number"] - }, - "YER": { - "type": ["null", "number"] - }, - "ZAR": { - "type": ["null", "number"] - }, - "ZMW": { - "type": ["null", "number"] - }, - "ZWL": { - "type": ["null", "number"] - } - } - } - } - }, + "json_schema": {}, "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": ["timestamp"] + "default_cursor_field": ["timestamp"], + "source_defined_primary_key": [["timestamp"]] }, "sync_mode": "incremental", - "destination_sync_mode": "append", - "cursor_field": ["timestamp"] + "destination_sync_mode": "append" } ] } diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/expected_records.jsonl new file mode 100644 index 000000000000..d0778e83e52d --- /dev/null +++ b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/expected_records.jsonl @@ -0,0 +1 @@ +{"stream": "open_exchange_rates", "data": {"disclaimer":"Usage subject to terms: https://openexchangerates.org/terms","license":"https://openexchangerates.org/license","timestamp":1690934399,"base":"USD","rates":{"AED":3.673075,"AFN":86.632624,"ALL":92.945891,"AMD":387.19215,"ANG":1.80297,"AOA":826.5905,"ARS":276.186305,"AUD":1.511123,"AWG":1.8,"AZN":1.7,"BAM":1.781604,"BBD":2,"BDT":108.574165,"BGN":1.77719,"BHD":0.376993,"BIF":2833.020268,"BMD":1,"BND":1.334044,"BOB":6.91298,"BRL":4.7935,"BSD":1,"BTC":3.3707219e-05,"BTN":82.300254,"BWP":13.215643,"BYN":2.525084,"BZD":2.016506,"CAD":1.32715,"CDF":2480.983028,"CHF":0.872626,"CLF":0.030502,"CLP":841.63,"CNH":7.176505,"CNY":7.1775,"COP":3915.417839,"CRC":545.006601,"CUC":1,"CUP":25.75,"CVE":100.606441,"CZK":21.7611,"DJF":178.072377,"DKK":6.7696,"DOP":56.285713,"DZD":135.64355,"EGP":30.9068,"ERN":15,"ETB":54.978478,"EUR":0.908455,"FJD":2.2183,"FKP":0.78154,"GBP":0.78154,"GEL":2.6,"GGP":0.78154,"GHS":11.333518,"GIP":0.78154,"GMD":60.375,"GNF":8625.954835,"GTQ":7.865732,"GYD":209.302328,"HKD":7.794182,"HNL":24.626137,"HRK":6.843146,"HTG":136.558033,"HUF":353.275548,"IDR":15139.102435,"ILS":3.63639,"IMP":0.78154,"INR":82.301851,"IQD":1310.335587,"IRR":42312.5,"ISK":131.09,"JEP":0.78154,"JMD":154.554565,"JOD":0.7083,"JPY":142.963,"KES":142.43631,"KGS":87.7821,"KHR":4131.520737,"KMF":448.30009,"KPW":900,"KRW":1289.399536,"KWD":0.307512,"KYD":0.833675,"KZT":444.670939,"LAK":19243.55048,"LBP":15109.691214,"LKR":319.129179,"LRD":186.499986,"LSL":18.016579,"LYD":4.782894,"MAD":9.857715,"MDL":17.749473,"MGA":4493.058467,"MKD":55.948277,"MMK":2100.856021,"MNT":3450,"MOP":8.030971,"MRU":38.104804,"MUR":45.2,"MVR":15.36,"MWK":1053.305792,"MXN":16.8783,"MYR":4.522,"MZN":63.764993,"NAD":18.28,"NGN":758.52,"NIO":36.568285,"NOK":10.168205,"NPR":131.678783,"NZD":1.62881,"OMR":0.384984,"PAB":1,"PEN":3.606245,"PGK":3.589272,"PHP":54.901495,"PKR":287.461749,"PLN":4.04257,"PYG":7277.293735,"QAR":3.641,"RON":4.4809,"RSD":106.556,"RUB":92.410004,"RWF":1176.872143,"SAR":3.751941,"SBD":8.368787,"SCR":13.151193,"SDG":601.589,"SEK":9.64075,"SGD":1.338502,"SHP":0.78154,"SLL":10260,"SOS":578.5,"SRD":20.9025,"SSP":130.2634,"STD":21050.599925,"SVC":8.747441,"SYP":512.380676,"SZL":18.009,"THB":32.4,"TJS":11.404289,"TMT":3.499986,"TND":2.487649,"TOP":2.272714,"TRY":9.319,"TTD":6.785232,"TWD":27.9,"TZS":2316.200319,"UAH":27.220516,"UGX":3530.977159,"UYU":43.788437,"UZS":10520.400572,"VES":3547812.5,"VND":22724.990112,"VUV":112.573844,"WST":2.515515,"XAF":554.633237,"XAG":0.034503,"XAU":0.000451,"XCD":2.7,"XDR":0.70168,"XOF":554.633237,"XPD":0.000406,"XPF":98.090344,"XPT":0.000824,"YER":250.350154,"ZAR":18.017,"ZMW":15.65,"ZWL":322.355011}}, "emitted_at": 1696204799} diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/sample_state.json index c6e267ec3977..631373b6310e 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/sample_state.json +++ b/airbyte-integrations/connectors/source-open-exchange-rates/integration_tests/sample_state.json @@ -1,5 +1,9 @@ -{ - "exchange_rates": { - "timestamp": 1673379575 +[ + { + "type": "STREAM", + "stream": { + "stream_state": { "timestamp": 1673379575 }, + "stream_descriptor": { "name": "open_exchange_rates" } + } } -} +] diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/metadata.yaml b/airbyte-integrations/connectors/source-open-exchange-rates/metadata.yaml index adc7e74a580d..f93f96677127 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/metadata.yaml +++ b/airbyte-integrations/connectors/source-open-exchange-rates/metadata.yaml @@ -1,24 +1,25 @@ data: + allowedHosts: + hosts: + - openexchangerates.org + registries: + oss: + enabled: true + cloud: + enabled: false connectorSubtype: api connectorType: source definitionId: 77d5ca6b-d345-4dce-ba1e-1935a75778b8 - dockerImageTag: 0.1.0 + dockerImageTag: 0.2.0 dockerRepository: airbyte/source-open-exchange-rates githubIssueLabel: source-open-exchange-rates - icon: airbyte.svg + icon: open-exchange-rates.svg license: MIT name: Open Exchange Rates - registries: - cloud: - enabled: false - oss: - enabled: true + releaseDate: 2023-10-02 releaseStage: alpha + supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/open-exchange-rates tags: - - language:python - ab_internal: - sl: 100 - ql: 100 - supportLevel: community + - language:lowcode metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/requirements.txt b/airbyte-integrations/connectors/source-open-exchange-rates/requirements.txt index d6e1198b1ab1..cc57334ef619 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/requirements.txt +++ b/airbyte-integrations/connectors/source-open-exchange-rates/requirements.txt @@ -1 +1,2 @@ +-e ../../bases/connector-acceptance-test -e . diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/setup.py b/airbyte-integrations/connectors/source-open-exchange-rates/setup.py index 8e52d07a0702..55fd4589213f 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/setup.py +++ b/airbyte-integrations/connectors/source-open-exchange-rates/setup.py @@ -6,13 +6,12 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-cdk~=0.2", - "pendulum==2.1.2", + "airbyte-cdk", ] TEST_REQUIREMENTS = [ "requests-mock~=1.9.3", - "pytest~=6.1", + "pytest~=6.2", "pytest-mock~=3.6.1", ] diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/__init__.py b/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/__init__.py index 71a500afcdab..0d1770484ee9 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/__init__.py +++ b/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. # diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/manifest.yaml b/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/manifest.yaml new file mode 100644 index 000000000000..5f8f7952682a --- /dev/null +++ b/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/manifest.yaml @@ -0,0 +1,57 @@ +version: "0.29.0" + +definitions: + selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: [] + + requester: + type: HttpRequester + url_base: "https://openexchangerates.org/api/" + http_method: "GET" + authenticator: + type: NoAuth + request_parameters: + app_id: "{{ config['app_id'] }}" + base: "{{ config['base'] }}" + + retriever: + type: SimpleRetriever + record_selector: + $ref: "#/definitions/selector" + paginator: + type: NoPagination + requester: + $ref: "#/definitions/requester" + + base_stream: + type: DeclarativeStream + retriever: + $ref: "#/definitions/retriever" + + incremental_sync_base: + type: DatetimeBasedCursor + cursor_field: "{{ parameters.incremental_cursor }}" + datetime_format: "%s" + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + + open_exchange_rates_stream: + $ref: "#/definitions/base_stream" + $parameters: + name: "open_exchange_rates" + incremental_cursor: "timestamp" + path: "historical/{{ format_datetime( config['start_date'] if not stream_state else stream_state['timestamp'], '%Y-%m-%d' ) }}.json" + incremental_sync: + $ref: "#/definitions/incremental_sync_base" + +streams: + - "#/definitions/open_exchange_rates_stream" + +check: + type: CheckStream + stream_names: + - "open_exchange_rates" diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/schemas/open_exchange_rates.json b/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/schemas/open_exchange_rates.json index 92c17f7e56aa..fae8408b06a0 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/schemas/open_exchange_rates.json +++ b/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/schemas/open_exchange_rates.json @@ -1,15 +1,23 @@ { - "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Generated schema for Root", + "type": ["null", "object"], "required": ["base", "rates"], "properties": { "base": { - "type": "string" + "type": ["null", "string"] + }, + "disclaimer": { + "type": ["null", "string"] + }, + "license": { + "type": ["null", "string"] }, "timestamp": { - "type": "integer" + "type": ["null", "integer"] }, "rates": { - "type": "object", + "type": ["null", "object"], "properties": { "AED": { "type": ["null", "number"] diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/source.py b/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/source.py index 2aed45b828bb..b0ea9f7fff6e 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/source.py +++ b/airbyte-integrations/connectors/source-open-exchange-rates/source_open_exchange_rates/source.py @@ -2,140 +2,17 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource -from abc import ABC -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple +""" +This file provides the necessary constructs to interpret a provided declarative YAML configuration file into +source connector. -import pendulum -import requests -from airbyte_cdk.sources import AbstractSource -from airbyte_cdk.sources.streams import Stream -from airbyte_cdk.sources.streams.http import HttpStream -from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator -from pendulum import DateTime +WARNING: Do not modify this file. +""" -class OpenExchangeRates(HttpStream, ABC): - url_base = "https://openexchangerates.org/api/" - - primary_key = None - cursor_field = "timestamp" - - def __init__(self, base: Optional[str], start_date: str, app_id: str, **kwargs: dict) -> None: - super().__init__(**kwargs) - - self.base = base - self.start_date = pendulum.parse(start_date) - self.app_id = app_id - self._cursor_value = None - - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: - return None - - def request_params( - self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None - ) -> MutableMapping[str, Any]: - - params = {} - - if self.base is not None: - params["base"] = self.base - - return params - - @property - def state(self) -> Mapping[str, Any]: - if self._cursor_value: - return {self.cursor_field: self._cursor_value} - else: - return {self.cursor_field: self.start_date.timestamp()} - - @state.setter - def state(self, value: Mapping[str, Any]): - self._cursor_value = value[self.cursor_field] - - def parse_response( - self, - response: requests.Response, - *, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, - ) -> Iterable[Mapping]: - response_json = response.json() - - latest_record_timestamp = response_json["timestamp"] - if self._cursor_value and latest_record_timestamp <= self._cursor_value: - return - if self._cursor_value: - self._cursor_value = max(self._cursor_value, latest_record_timestamp) - else: - self._cursor_value = latest_record_timestamp - - yield response_json - - def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: - start_date = stream_state[self.cursor_field] if stream_state and self.cursor_field in stream_state else self.start_date - - if isinstance(start_date, int): - start_date = pendulum.from_timestamp(start_date) - - return self._chunk_date_range(start_date) - - def path( - self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None - ) -> str: - return f"historical/{stream_slice['date']}.json" - - def _chunk_date_range(self, start_date: DateTime) -> List[Mapping[str, Any]]: - """ - Returns a list of each day between the start date and now. - The return value is a list of dicts {'date': date_string}. - """ - dates = [] - - while start_date < pendulum.now(): - dates.append({"date": start_date.to_date_string()}) - start_date = start_date.add(days=1) - return dates - - -# Source -class SourceOpenExchangeRates(AbstractSource): - def check_connection(self, logger, config) -> Tuple[bool, any]: - """ - Checks the connection by sending a request to /usage and checks the remaining quota - - :param config: the user-input config object conforming to the connector's spec.yaml - :param logger: logger object - :return Tuple[bool, any]: (True, None) if the input config can be used to connect to the API successfully, (False, error) otherwise. - """ - try: - auth = TokenAuthenticator(token=config["app_id"], auth_method="Token").get_auth_header() - - resp = requests.get(f"{OpenExchangeRates.url_base}usage.json", headers=auth) - status = resp.status_code - - logger.info(f"Ping response code: {status}") - response_dict = resp.json() - - if status == 200: - quota_remaining = response_dict["data"]["usage"]["requests_remaining"] - - if quota_remaining > 0: - return True, None - - return False, "Quota exceeded" - else: - description = response_dict.get("description") - return False, description - except Exception as e: - return False, e - - def streams(self, config: Mapping[str, Any]) -> List[Stream]: - """ - :param config: A Mapping of the user input configuration as defined in the connector spec. - """ - auth = TokenAuthenticator(token=config["app_id"], auth_method="Token") - - return [OpenExchangeRates(base=config["base"], start_date=config["start_date"], app_id=config["app_id"], authenticator=auth)] +# Declarative Source +class SourceOpenExchangeRates(YamlDeclarativeSource): + def __init__(self): + super().__init__(**{"path_to_yaml": "manifest.yaml"}) diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/unit_tests/__init__.py b/airbyte-integrations/connectors/source-open-exchange-rates/unit_tests/__init__.py deleted file mode 100644 index 1100c1c58cf5..000000000000 --- a/airbyte-integrations/connectors/source-open-exchange-rates/unit_tests/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# -# Copyright (c) 2022 Airbyte, Inc., all rights reserved. -# diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/unit_tests/conftest.py b/airbyte-integrations/connectors/source-open-exchange-rates/unit_tests/conftest.py deleted file mode 100644 index aa7b8cdb385c..000000000000 --- a/airbyte-integrations/connectors/source-open-exchange-rates/unit_tests/conftest.py +++ /dev/null @@ -1,25 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -from pytest import fixture - - -@fixture(name="config") -def config_fixture(requests_mock): - config = {"start_date": "2022-11-13", "base": "USD", "app_id": "KEY"} - - return config - - -@fixture(name="mock_stream") -def mock_stream_fixture(requests_mock): - def _mock_stream(path, response=None, status_code=200): - if response is None: - response = {} - - url = f"https://openexchangerates.org/api/{path}.json" - requests_mock.get(url, json=response, status_code=status_code) - - return _mock_stream diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/unit_tests/test_source.py b/airbyte-integrations/connectors/source-open-exchange-rates/unit_tests/test_source.py deleted file mode 100644 index 9610e3083a9f..000000000000 --- a/airbyte-integrations/connectors/source-open-exchange-rates/unit_tests/test_source.py +++ /dev/null @@ -1,102 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import logging - -from source_open_exchange_rates.source import SourceOpenExchangeRates - -logger = logging.getLogger("airbyte") - - -def test_check_connection(config, mock_stream): - response = { - "status": 200, - "data": { - "app_id": "KEY", - "status": "active", - "plan": { - "name": "Free", - "quota": "1000 requests / month", - "update_frequency": "3600s", - "features": { - "base": False, - "symbols": False, - "experimental": True, - "time-series": False, - "convert": False, - "bid-ask": False, - "ohlc": False, - "spot": False, - }, - }, - "usage": { - "requests": 27, - "requests_quota": 1000, - "requests_remaining": 973, - "days_elapsed": 1, - "days_remaining": 29, - "daily_average": 27, - }, - }, - } - - mock_stream(path="usage", response=response) - ok, error_msg = SourceOpenExchangeRates().check_connection(logger, config=config) - logger.info(error_msg) - assert ok - assert error_msg is None - - -def test_check_connection_quota_exceeded_exception(config, mock_stream): - response = { - "status": 200, - "data": { - "app_id": "KEY", - "status": "active", - "plan": { - "name": "Free", - "quota": "1000 requests / month", - "update_frequency": "3600s", - "features": { - "base": False, - "symbols": False, - "experimental": True, - "time-series": False, - "convert": False, - "bid-ask": False, - "ohlc": False, - "spot": False, - }, - }, - "usage": { - "requests": 1000, - "requests_quota": 1000, - "requests_remaining": 0, - "days_elapsed": 1, - "days_remaining": 29, - "daily_average": 27, - }, - }, - } - - mock_stream(path="usage", response=response, status_code=200) - ok, error_msg = SourceOpenExchangeRates().check_connection(logger, config=config) - - assert not ok - assert error_msg == "Quota exceeded" - - -def test_check_connection_invalid_appid_exception(config, mock_stream): - response = { - "error": True, - "status": 401, - "message": "invalid_app_id", - "description": "Invalid App ID - please sign up at https://openexchangerates.org/signup, or contact support@openexchangerates.org.", - } - - mock_stream(path="usage", response=response, status_code=401) - ok, error_msg = SourceOpenExchangeRates().check_connection(logger, config=config) - - assert not ok - assert error_msg == response["description"] diff --git a/airbyte-integrations/connectors/source-orbit/Dockerfile b/airbyte-integrations/connectors/source-orbit/Dockerfile index d20a9c53aa90..1a13e320be9c 100644 --- a/airbyte-integrations/connectors/source-orbit/Dockerfile +++ b/airbyte-integrations/connectors/source-orbit/Dockerfile @@ -34,5 +34,5 @@ COPY source_orbit ./source_orbit ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.2.0 LABEL io.airbyte.name=airbyte/source-orbit diff --git a/airbyte-integrations/connectors/source-orbit/acceptance-test-config.yml b/airbyte-integrations/connectors/source-orbit/acceptance-test-config.yml index d49a28f248ac..a9cca89ed872 100644 --- a/airbyte-integrations/connectors/source-orbit/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-orbit/acceptance-test-config.yml @@ -1,6 +1,7 @@ # See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) # for more information about how to configure these tests connector_image: airbyte/source-orbit:dev +test_strictness_level: low tests: spec: - spec_path: "source_orbit/spec.yaml" diff --git a/airbyte-integrations/connectors/source-orbit/metadata.yaml b/airbyte-integrations/connectors/source-orbit/metadata.yaml index 60494a01a6e3..95c1d5b7450a 100644 --- a/airbyte-integrations/connectors/source-orbit/metadata.yaml +++ b/airbyte-integrations/connectors/source-orbit/metadata.yaml @@ -2,7 +2,7 @@ data: connectorSubtype: api connectorType: source definitionId: 95bcc041-1d1a-4c2e-8802-0ca5b1bfa36a - dockerImageTag: 0.1.1 + dockerImageTag: 0.2.0 dockerRepository: airbyte/source-orbit githubIssueLabel: source-orbit icon: orbit.svg diff --git a/airbyte-integrations/connectors/source-orbit/source_orbit/schemas/members.json b/airbyte-integrations/connectors/source-orbit/source_orbit/schemas/members.json index eac6de53806e..f4e0f9e5f07e 100644 --- a/airbyte-integrations/connectors/source-orbit/source_orbit/schemas/members.json +++ b/airbyte-integrations/connectors/source-orbit/source_orbit/schemas/members.json @@ -1,24 +1,26 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { - "type": "string" + "type": ["null", "string"] }, "fake": { - "type": "string" + "type": ["null", "string"] }, "type": { - "type": "string" + "type": ["null", "string"] }, "attributes": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "activities_count": { - "type": "integer" + "type": ["null", "integer"] }, "activities_score": { - "type": "integer" + "type": ["null", "number"] }, "avatar_url": { "type": ["null", "string"] @@ -71,13 +73,13 @@ "tag_list": { "type": ["null", "array"], "items": { - "type": "string" + "type": ["null", "string"] } }, "tags": { "type": ["null", "array"], "items": { - "type": "string" + "type": ["null", "string"] } }, "teammate": { @@ -99,10 +101,10 @@ "type": ["null", "string"] }, "created": { - "type": "boolean" + "type": ["null", "boolean"] }, "id": { - "type": "string" + "type": ["null", "string"] }, "orbit_level": { "type": ["null", "integer"] @@ -140,16 +142,21 @@ "topics": { "type": ["null", "array"], "items": { - "type": "string" + "type": ["null", "string"] } }, "languages": { "type": ["null", "array"], "items": { - "type": "string" + "type": ["null", "string"] } } } + }, + "relationships": { + "type": ["null", "object"], + "additionalProperties": true, + "properties": {} } } } diff --git a/airbyte-integrations/connectors/source-orbit/source_orbit/schemas/workspace.json b/airbyte-integrations/connectors/source-orbit/source_orbit/schemas/workspace.json index 79a2ed3d7982..93d06acac270 100644 --- a/airbyte-integrations/connectors/source-orbit/source_orbit/schemas/workspace.json +++ b/airbyte-integrations/connectors/source-orbit/source_orbit/schemas/workspace.json @@ -1,36 +1,45 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { - "type": "string" + "type": ["null", "string"] }, "type": { - "type": "string" + "type": ["null", "string"] + }, + "relationships": { + "type": ["null", "object"], + "additionalProperties": true, + "properties": {} }, "attributes": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "name": { - "type": "string" + "type": ["null", "string"] }, "slug": { - "type": "string" + "type": ["null", "string"] }, "updated_at": { - "type": "string" + "type": ["null", "string"] }, "created_at": { - "type": "string" + "type": ["null", "string"] }, "members_count": { - "type": "integer" + "type": ["null", "integer"] }, "activities_count": { - "type": "integer" + "type": ["null", "integer"] }, "tags": { - "type": "object" + "type": ["null", "object"], + "additionalProperties": true, + "properties": {} } } } diff --git a/airbyte-integrations/connectors/source-orbit/source_orbit/spec.yaml b/airbyte-integrations/connectors/source-orbit/source_orbit/spec.yaml index 8277b6d61539..61dbd1b3357d 100644 --- a/airbyte-integrations/connectors/source-orbit/source_orbit/spec.yaml +++ b/airbyte-integrations/connectors/source-orbit/source_orbit/spec.yaml @@ -6,7 +6,7 @@ connectionSpecification: required: - api_token - workspace - additionalProperties: false + additionalProperties: true properties: api_token: type: string diff --git a/airbyte-integrations/connectors/source-pagerduty/.dockerignore b/airbyte-integrations/connectors/source-pagerduty/.dockerignore new file mode 100644 index 000000000000..2d661c3708ef --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_pagerduty +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-pagerduty/Dockerfile b/airbyte-integrations/connectors/source-pagerduty/Dockerfile new file mode 100644 index 000000000000..1471709cc025 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_pagerduty ./source_pagerduty + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.2.0 +LABEL io.airbyte.name=airbyte/source-pagerduty diff --git a/airbyte-integrations/connectors/source-pagerduty/README.md b/airbyte-integrations/connectors/source-pagerduty/README.md new file mode 100644 index 000000000000..f71f30d4dd76 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/README.md @@ -0,0 +1,82 @@ +# Pagerduty Source + +This is the repository for the Pagerduty configuration based source connector. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/sources/pagerduty). + +## Local development + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-pagerduty:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/sources/pagerduty) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_pagerduty/spec.yaml` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source pagerduty test creds` +and place them into `secrets/config.json`. + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-pagerduty:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-pagerduty:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-pagerduty:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-pagerduty:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-pagerduty:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-pagerduty:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing + +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +To run your integration tests with Docker, run: +``` +./acceptance-test-docker.sh +``` + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-pagerduty:check +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-pagerduty:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-pagerduty/__init__.py b/airbyte-integrations/connectors/source-pagerduty/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-pagerduty/acceptance-test-config.yml b/airbyte-integrations/connectors/source-pagerduty/acceptance-test-config.yml new file mode 100644 index 000000000000..a2899a14a684 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/acceptance-test-config.yml @@ -0,0 +1,37 @@ +# See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-pagerduty:dev +test_strictness_level: low +acceptance_tests: + spec: + tests: + - spec_path: "source_pagerduty/spec.yaml" + backward_compatibility_tests_config: + disable_for_version: 0.1.23 + connection: + tests: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + tests: + - config_path: "secrets/config.json" + backward_compatibility_tests_config: + disable_for_version: 0.1.23 + basic_read: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + incremental: + bypass_reason: "This connector does not implement incremental sync" + # TODO uncomment this block this block if your connector implements incremental sync: + # tests: + # - config_path: "secrets/config.json" + # configured_catalog_path: "integration_tests/configured_catalog.json" + # future_state: + # future_state_path: "integration_tests/abnormal_state.json" + full_refresh: + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-pagerduty/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-pagerduty/acceptance-test-docker.sh new file mode 100755 index 000000000000..b6d65deeccb4 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/acceptance-test-docker.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh + +source "$(git rev-parse --show-toplevel)/airbyte-integrations/bases/connector-acceptance-test/acceptance-test-docker.sh" diff --git a/airbyte-integrations/connectors/source-pagerduty/icon.svg b/airbyte-integrations/connectors/source-pagerduty/icon.svg new file mode 100644 index 000000000000..98da64d7a2ef --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/icon.svg @@ -0,0 +1,2 @@ + +PagerDuty icon \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pagerduty/integration_tests/__init__.py b/airbyte-integrations/connectors/source-pagerduty/integration_tests/__init__.py new file mode 100644 index 000000000000..c941b3045795 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/integration_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-pagerduty/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-pagerduty/integration_tests/abnormal_state.json new file mode 100644 index 000000000000..52b0f2c2118f --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/integration_tests/abnormal_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "todo-abnormal-value" + } +} diff --git a/airbyte-integrations/connectors/source-pagerduty/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-pagerduty/integration_tests/acceptance.py new file mode 100644 index 000000000000..9e6409236281 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/integration_tests/acceptance.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("connector_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + # TODO: setup test dependencies if needed. otherwise remove the TODO comments + yield + # TODO: clean up test dependencies diff --git a/airbyte-integrations/connectors/source-pagerduty/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-pagerduty/integration_tests/configured_catalog.json new file mode 100644 index 000000000000..990545d018b3 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/integration_tests/configured_catalog.json @@ -0,0 +1,58 @@ +{ + "streams": [ + { + "stream": { + "name": "incidents", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "incident_logs", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "priorities", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "teams", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "services", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "users", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-pagerduty/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-pagerduty/integration_tests/expected_records.jsonl new file mode 100644 index 000000000000..ac09476cd5aa --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/integration_tests/expected_records.jsonl @@ -0,0 +1,6 @@ +{"stream": "incidents", "data": {"incident_number":1,"title":"admin","description":"admin","created_at":"2023-10-07T07:25:26Z","updated_at":"2023-10-07T07:25:26Z","status":"triggered","incident_key":"f311ae09b6744daf8580aa92d2cc1b1f","service":{"id":"P3VUFA7","type":"service_reference","summary":"asd","self":"https://api.pagerduty.com/services/P3VUFA7","html_url":"https://dev-pixil.pagerduty.com/service-directory/P3VUFA7"},"assignments":[{"at":"2023-10-07T07:25:26Z","assignee":{"id":"PLOD9FO","type":"user_reference","summary":"tester tester","self":"https://api.pagerduty.com/users/PLOD9FO","html_url":"https://dev-pixil.pagerduty.com/users/PLOD9FO"}}],"assigned_via":"escalation_policy","last_status_change_at":"2023-10-07T07:25:26Z","resolved_at":null,"first_trigger_log_entry":{"id":"R21I9CLEE8JB0F7ICF7XBE1CMI","type":"trigger_log_entry_reference","summary":"Triggered through the website.","self":"https://api.pagerduty.com/log_entries/R21I9CLEE8JB0F7ICF7XBE1CMI","html_url":"https://dev-pixil.pagerduty.com/incidents/Q2U8NVNBMCM7JW/log_entries/R21I9CLEE8JB0F7ICF7XBE1CMI"},"alert_counts":{"all":0,"triggered":0,"resolved":0},"is_mergeable":true,"escalation_policy":{"id":"PQEF2Q3","type":"escalation_policy_reference","summary":"asd-ep","self":"https://api.pagerduty.com/escalation_policies/PQEF2Q3","html_url":"https://dev-pixil.pagerduty.com/escalation_policies/PQEF2Q3"},"teams":[],"pending_actions":[],"acknowledgements":[],"basic_alert_grouping":null,"alert_grouping":null,"last_status_change_by":{"id":"P3VUFA7","type":"service_reference","summary":"asd","self":"https://api.pagerduty.com/services/P3VUFA7","html_url":"https://dev-pixil.pagerduty.com/service-directory/P3VUFA7"},"priority":{"id":"PXS7CUY","type":"priority","summary":"P1","self":"https://api.pagerduty.com/priorities/PXS7CUY","html_url":null,"account_id":"PLANXGO","color":"a8171c","created_at":"2023-10-07T07:18:42Z","description":"","name":"P1","order":500000000,"schema_version":0,"updated_at":"2023-10-07T07:18:42Z"},"incidents_responders":[],"responder_requests":[],"subscriber_requests":[],"urgency":"high","id":"Q2U8NVNBMCM7JW","type":"incident","summary":"[#1] admin","self":"https://api.pagerduty.com/incidents/Q2U8NVNBMCM7JW","html_url":"https://dev-pixil.pagerduty.com/incidents/Q2U8NVNBMCM7JW"}, "emitted_at": 1693501393086} +{"stream": "incident_logs", "data": {"id":"RNUYTXSTD6TFUACXJH8FS2I4TM","type":"annotate_log_entry","summary":"Noteaddedbytestertester.","self":"https://api.pagerduty.com/log_entries/RNUYTXSTD6TFUACXJH8FS2I4TM","html_url":null,"created_at":"2023-10-07T07:28:46Z","agent":{"id":"PLOD9FO","type":"user_reference","summary":"testertester","self":"https://api.pagerduty.com/users/PLOD9FO","html_url":"https://dev-pixil.pagerduty.com/users/PLOD9FO"},"channel":{"type":"note","summary":"sdgsdf"},"service":{"id":"P3VUFA7","type":"service_reference","summary":"asd","self":"https://api.pagerduty.com/services/P3VUFA7","html_url":"https://dev-pixil.pagerduty.com/service-directory/P3VUFA7"},"incident":{"id":"Q2U8NVNBMCM7JW","type":"incident_reference","summary":"[#1]admin","self":"https://api.pagerduty.com/incidents/Q2U8NVNBMCM7JW","html_url":"https://dev-pixil.pagerduty.com/incidents/Q2U8NVNBMCM7JW"},"teams":[],"contexts":[]}, "emitted_at": 1693501393086} +{"stream": "priorities", "data": {"id":"PXS7CUY","type":"priority","summary":"P1","self":"https://api.pagerduty.com/priorities/PXS7CUY","html_url":null,"account_id":"PLANXGO","color":"a8171c","created_at":"2023-10-07T07:18:42Z","description":"","name":"P1","order":500000000,"schema_version":0,"updated_at":"2023-10-07T07:18:42Z"}, "emitted_at": 1693501393086} +{"stream": "services", "data": {"id":"P3VUFA7","name":"asd","description":"asd","created_at":"2023-10-07T07:25:04Z","updated_at":"2023-10-07T07:25:04Z","status":"warning","teams":[],"alert_creation":"create_alerts_and_incidents","addons":[],"scheduled_actions":[],"support_hours":null,"last_incident_timestamp":"2023-10-07T07:25:26Z","escalation_policy":{"id":"PQEF2Q3","type":"escalation_policy_reference","summary":"asd-ep","self":"https://api.pagerduty.com/escalation_policies/PQEF2Q3","html_url":"https://dev-pixil.pagerduty.com/escalation_policies/PQEF2Q3"},"incident_urgency_rule":{"type":"constant","urgency":"high"},"acknowledgement_timeout":null,"auto_resolve_timeout":null,"alert_grouping":"intelligent","alert_grouping_timeout":null,"alert_grouping_parameters":{"type":"intelligent","config":{"time_window":300,"recommended_time_window":300}},"integrations":[{"id":"PXMYB2G","type":"events_api_v2_inbound_integration_reference","summary":"EventsAPIV2","self":"https://api.pagerduty.com/services/P3VUFA7/integrations/PXMYB2G","html_url":"https://dev-pixil.pagerduty.com/services/P3VUFA7/integrations/PXMYB2G"}],"response_play":null,"type":"service","summary":"asd","self":"https://api.pagerduty.com/services/P3VUFA7","html_url":"https://dev-pixil.pagerduty.com/service-directory/P3VUFA7"}, "emitted_at": 1693501393086} +{"stream": "teams", "data": {"id":"PLYRZEM","name":"asd","description":null,"type":"team","summary":"asd","self":"https://api.pagerduty.com/teams/PLYRZEM","html_url":"https://dev-pixil.pagerduty.com/teams/PLYRZEM","default_role":"manager","parent":null}, "emitted_at": 1693501393086} +{"stream": "users", "data": {"name":"pixil","email":"hesor11831@gekme.com","time_zone":"Etc/UTC","color":"red","avatar_url":"https://secure.gravatar.com/avatar/64b1ea9c6c0ec582d946b5cfecbbfd4f.png?d=mm&r=PG","billed":true,"role":"admin","description":null,"invitation_sent":true,"job_title":null,"teams":[{"id":"PLYRZEM","type":"team_reference","summary":"asd","self":"https://api.pagerduty.com/teams/PLYRZEM","html_url":"https://dev-pixil.pagerduty.com/teams/PLYRZEM"}],"contact_methods":[{"id":"PD9J7I5","type":"email_contact_method_reference","summary":"Default","self":"https://api.pagerduty.com/users/PHCS92L/contact_methods/PD9J7I5","html_url":null}],"notification_rules":[{"id":"POHYPIT","type":"assignment_notification_rule_reference","summary":"0minutes:channelPD9J7I5","self":"https://api.pagerduty.com/users/PHCS92L/notification_rules/POHYPIT","html_url":null},{"id":"PSTJQXG","type":"assignment_notification_rule_reference","summary":"0minutes:channelPD9J7I5","self":"https://api.pagerduty.com/users/PHCS92L/notification_rules/PSTJQXG","html_url":null}],"coordinated_incidents":[{"incident":{"id":"Q2U8NVNBMCM7JW","type":"incident_reference","summary":"[#1]admin","self":"https://api.pagerduty.com/incidents/Q2U8NVNBMCM7JW","html_url":"https://dev-pixil.pagerduty.com/incidents/Q2U8NVNBMCM7JW"},"requester":{"id":"PLOD9FO","type":"user_reference","summary":"testertester","self":"https://api.pagerduty.com/users/PLOD9FO","html_url":"https://dev-pixil.pagerduty.com/users/PLOD9FO"},"message":"Pleasehelpwith\"admin\"","state":"pending","requested_at":"2023-10-07T07:28:35Z"}],"id":"PHCS92L","type":"user","summary":"pixil","self":"https://api.pagerduty.com/users/PHCS92L","html_url":"https://dev-pixil.pagerduty.com/users/PHCS92L"}, "emitted_at": 1693501393086} diff --git a/airbyte-integrations/connectors/source-pagerduty/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-pagerduty/integration_tests/invalid_config.json new file mode 100644 index 000000000000..c1228206caf3 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/integration_tests/invalid_config.json @@ -0,0 +1,3 @@ +{ + "token": "xxxxxxx" +} diff --git a/airbyte-integrations/connectors/source-pagerduty/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-pagerduty/integration_tests/sample_config.json new file mode 100644 index 000000000000..41bdf992eab6 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/integration_tests/sample_config.json @@ -0,0 +1,3 @@ +{ + "token": "xxxxxxxxxxx" +} diff --git a/airbyte-integrations/connectors/source-pagerduty/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-pagerduty/integration_tests/sample_state.json new file mode 100644 index 000000000000..3587e579822d --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/integration_tests/sample_state.json @@ -0,0 +1,5 @@ +{ + "todo-stream-name": { + "todo-field-name": "value" + } +} diff --git a/airbyte-integrations/connectors/source-pagerduty/main.py b/airbyte-integrations/connectors/source-pagerduty/main.py new file mode 100644 index 000000000000..61d193268f6b --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_pagerduty import SourcePagerduty + +if __name__ == "__main__": + source = SourcePagerduty() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-pagerduty/metadata.yaml b/airbyte-integrations/connectors/source-pagerduty/metadata.yaml new file mode 100644 index 000000000000..403e4bd393d7 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/metadata.yaml @@ -0,0 +1,28 @@ +data: + allowedHosts: + hosts: + - api.pagerduty.com + registries: + oss: + enabled: false + cloud: + enabled: false + connectorSubtype: api + connectorType: source + definitionId: 2544ac39-02be-4bf5-82ad-f52bbb833bf5 + dockerImageTag: 0.2.0 + dockerRepository: airbyte/source-pagerduty + githubIssueLabel: source-pagerduty + icon: pagerduty.svg + license: MIT + name: Pagerduty + releaseDate: 2023-10-10 + releaseStage: alpha + supportLevel: community + documentationUrl: https://docs.airbyte.com/integrations/sources/pagerduty + tags: + - language:lowcode + ab_internal: + sl: 100 + ql: 100 +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-pagerduty/requirements.txt b/airbyte-integrations/connectors/source-pagerduty/requirements.txt new file mode 100644 index 000000000000..cc57334ef619 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/connector-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-pagerduty/setup.py b/airbyte-integrations/connectors/source-pagerduty/setup.py new file mode 100644 index 000000000000..aec396ac036d --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/setup.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk"] + +TEST_REQUIREMENTS = [ + "requests-mock~=1.9.3", + "pytest~=6.2", + "pytest-mock~=3.6.1", +] + +setup( + name="source_pagerduty", + description="Source implementation for Pagerduty.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/__init__.py b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/__init__.py new file mode 100644 index 000000000000..27f6419a3392 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .source import SourcePagerduty + +__all__ = ["SourcePagerduty"] diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/manifest.yaml b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/manifest.yaml new file mode 100644 index 000000000000..39c0f41f49c6 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/manifest.yaml @@ -0,0 +1,110 @@ +version: "0.29.0" + +definitions: + selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: ["{{ parameters.extractorPath }}"] + + requester: + type: HttpRequester + url_base: "https://api.pagerduty.com" + http_method: "GET" + authenticator: + type: "ApiKeyAuthenticator" + header: "Authorization" + api_token: "Token token={{ config['token'] }}" + + retriever: + type: SimpleRetriever + record_selector: + $ref: "#/definitions/selector" + paginator: + type: NoPagination + requester: + $ref: "#/definitions/requester" + + base_stream: + type: DeclarativeStream + retriever: + $ref: "#/definitions/retriever" + + incidents_stream: + $ref: "#/definitions/base_stream" + $parameters: + name: "incidents" + primary_key: "id" + extractorPath: "incidents" + path: "/incidents" + + incidents_partition_router: + type: SubstreamPartitionRouter + parent_stream_configs: + - stream: "#/definitions/incidents_stream" + parent_key: "id" + partition_field: "incident_id" + + incident_logs_stream: + $ref: "#/definitions/base_stream" + $parameters: + name: "incident_logs" + primary_key: "id" + extractorPath: "log_entries" + retriever: + $ref: "#/definitions/retriever" + partition_router: + $ref: "#/definitions/incidents_partition_router" + requester: + $ref: "#/definitions/requester" + path: "/incidents/{{ stream_partition.incident_id }}/log_entries" + + teams_stream: + $ref: "#/definitions/base_stream" + $parameters: + name: "teams" + primary_key: "id" + extractorPath: "teams" + path: "/teams" + + services_stream: + $ref: "#/definitions/base_stream" + $parameters: + name: "services" + primary_key: "id" + extractorPath: "services" + path: "/services" + + users_stream: + $ref: "#/definitions/base_stream" + $parameters: + name: "users" + primary_key: "id" + extractorPath: "users" + path: "/users" + + priorities_stream: + $ref: "#/definitions/base_stream" + $parameters: + name: "priorities" + primary_key: "id" + extractorPath: "priorities" + path: "/priorities" + +streams: + - "#/definitions/incidents_stream" + - "#/definitions/incident_logs_stream" + - "#/definitions/teams_stream" + - "#/definitions/services_stream" + - "#/definitions/users_stream" + - "#/definitions/priorities_stream" + +check: + type: CheckStream + stream_names: + - "incidents" + - "incident_logs" + - "teams" + - "services" + - "users" + - "priorities" diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/incident_logs.json b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/incident_logs.json new file mode 100644 index 000000000000..6cd2ff5583b9 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/incident_logs.json @@ -0,0 +1,380 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Incident Logs schema", + "additionalProperties": true, + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "agent": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "channel": { + "type": ["null", "object"], + "properties": { + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "notification": { + "type": ["null", "object"], + "properties": { + "type": { + "type": ["null", "string"] + }, + "address": { + "type": ["null", "string"] + }, + "conferenceAddress": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + } + } + }, + "old_priority": { + "type": ["null", "string"] + }, + "new_priority": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "account_id": { + "type": ["null", "string"] + }, + "color": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "order": { + "type": ["null", "number"] + }, + "schema_version": { + "type": ["null", "number"] + }, + "updated_at": { + "type": ["null", "string"] + } + } + } + } + }, + "service": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "incident": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "teams": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "contexts": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "message": { + "type": ["null", "string"] + }, + "responder": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "channels": { + "type": ["null", "string"] + }, + "responders_list": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + } + }, + "escalation_policy": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "escalation_rules": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "escalation_delay_in_minutes": { + "type": ["null", "number"] + }, + "targets": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + } + } + } + } + }, + "services": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + } + }, + "num_loops": { + "type": ["null", "number"] + }, + "teams": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "description": { + "type": ["null", "string"] + }, + "on_call_handoff_notifications": { + "type": ["null", "string"] + }, + "privilege": { + "type": ["null", "string"] + } + } + }, + "level": { + "type": ["null", "number"] + }, + "action": { + "type": ["null", "string"] + }, + "event_details": { + "type": ["null", "object"], + "properties": { + "description": { + "type": ["null", "string"] + } + } + }, + "user": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "assignees": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + } + } + } +} diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/incidents.json b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/incidents.json new file mode 100644 index 000000000000..26e281c5c413 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/incidents.json @@ -0,0 +1,549 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Incidents Schema", + "additionalProperties": true, + "type": ["object", "null"], + "properties": { + "incident_number": { + "type": ["number", "null"] + }, + "title": { + "type": ["string", "null"] + }, + "description": { + "type": ["string", "null"] + }, + "created_at": { + "type": ["string", "null"] + }, + "updated_at": { + "type": ["string", "null"] + }, + "status": { + "type": ["string", "null"] + }, + "incident_key": { + "type": ["string", "null"] + }, + "service": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + }, + "assignments": { + "type": ["array", "null"], + "items": { + "type": ["object", "null"], + "properties": { + "at": { + "type": ["string", "null"] + }, + "assignee": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + } + } + } + }, + "assigned_via": { + "type": ["string", "null"] + }, + "last_status_change_at": { + "type": ["string", "null"] + }, + "resolved_at": { + "type": ["string", "null"] + }, + "first_trigger_log_entry": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + }, + "alert_counts": { + "type": ["object", "null"], + "properties": { + "all": { + "type": ["number", "null"] + }, + "triggered": { + "type": ["number", "null"] + }, + "resolved": { + "type": ["number", "null"] + } + } + }, + "is_mergeable": { + "type": "boolean" + }, + "escalation_policy": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + }, + "teams": { + "type": ["array", "null"], + "items": { + "type": ["string", "null"] + } + }, + "pending_actions": { + "type": ["array", "null"], + "items": { + "type": ["string", "null"] + } + }, + "acknowledgements": { + "type": ["array", "null"], + "items": { + "type": ["object", "null"], + "properties": { + "at": { + "type": ["string", "null"] + }, + "acknowledger": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + } + } + } + }, + "basic_alert_grouping": { + "type": ["string", "null"] + }, + "alert_grouping": { + "type": ["string", "null"] + }, + "last_status_change_by": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + }, + "priority": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + }, + "account_id": { + "type": ["string", "null"] + }, + "color": { + "type": ["string", "null"] + }, + "created_at": { + "type": ["string", "null"] + }, + "description": { + "type": ["string", "null"] + }, + "name": { + "type": ["string", "null"] + }, + "order": { + "type": ["number", "null"] + }, + "schema_version": { + "type": ["number", "null"] + }, + "updated_at": { + "type": ["string", "null"] + } + } + }, + "incidents_responders": { + "type": ["array", "null"], + "items": { + "type": ["object", "null"], + "properties": { + "state": { + "type": ["string", "null"] + }, + "user": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + }, + "avatar_url": { + "type": ["string", "null"] + }, + "job_title": { + "type": ["string", "null"] + } + } + }, + "incident": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + }, + "updated_at": { + "type": ["string", "null"] + }, + "message": { + "type": ["string", "null"] + }, + "requester": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + }, + "avatar_url": { + "type": ["string", "null"] + }, + "job_title": { + "type": ["string", "null"] + } + } + }, + "requested_at": { + "type": ["string", "null"] + }, + "escalation_policy_requests": { + "type": ["array", "null"], + "items": { + "type": ["string", "null"] + } + } + } + } + }, + "responder_requests": { + "type": ["array", "null"], + "items": { + "type": ["object", "null"], + "properties": { + "incident": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + }, + "requester": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + }, + "requested_at": { + "type": ["string", "null"] + }, + "message": { + "type": ["string", "null"] + }, + "responder_request_targets": { + "type": ["array", "null"], + "items": { + "type": ["object", "null"], + "properties": { + "responder_request_target": { + "type": ["object", "null"], + "properties": { + "type": { + "type": ["string", "null"] + }, + "id": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "incidents_responders": { + "type": ["array", "null"], + "items": { + "type": ["object", "null"], + "properties": { + "state": { + "type": ["string", "null"] + }, + "user": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + }, + "avatar_url": { + "type": ["string", "null"] + }, + "job_title": { + "type": ["string", "null"] + } + } + }, + "incident": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } + }, + "updated_at": { + "type": ["string", "null"] + }, + "message": { + "type": ["string", "null"] + }, + "requester": { + "type": ["object", "null"], + "properties": { + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + }, + "avatar_url": { + "type": ["string", "null"] + }, + "job_title": { + "type": ["string", "null"] + } + } + }, + "requested_at": { + "type": ["string", "null"] + } + } + } + } + } + } + } + } + } + } + } + }, + "subscriber_requests": { + "type": ["array", "null"], + "items": { + "type": ["string", "null"] + } + }, + "urgency": { + "type": ["string", "null"] + }, + "id": { + "type": ["string", "null"] + }, + "type": { + "type": ["string", "null"] + }, + "summary": { + "type": ["string", "null"] + }, + "self": { + "type": ["string", "null"] + }, + "html_url": { + "type": ["string", "null"] + } + } +} diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/priorities.json b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/priorities.json new file mode 100644 index 000000000000..98267d5eb806 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/priorities.json @@ -0,0 +1,47 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Priorities schema", + "additionalProperties": true, + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "account_id": { + "type": ["null", "string"] + }, + "color": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "order": { + "type": ["null", "number"] + }, + "schema_version": { + "type": ["null", "number"] + }, + "updated_at": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/services.json b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/services.json new file mode 100644 index 000000000000..c512977ce396 --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/services.json @@ -0,0 +1,153 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Services schema", + "additionalProperties": true, + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "updated_at": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "teams": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "alert_creation": { + "type": ["null", "string"] + }, + "addons": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "scheduled_actions": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "support_hours": { + "type": ["null", "string"] + }, + "last_incident_timestamp": { + "type": ["null", "string"] + }, + "escalation_policy": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "incident_urgency_rule": { + "type": ["null", "object"], + "properties": { + "type": { + "type": ["null", "string"] + }, + "urgency": { + "type": ["null", "string"] + } + } + }, + "acknowledgement_timeout": { + "type": ["null", "string"] + }, + "auto_resolve_timeout": { + "type": ["null", "string"] + }, + "alert_grouping": { + "type": ["null", "string"] + }, + "alert_grouping_timeout": { + "type": ["null", "string"] + }, + "alert_grouping_parameters": { + "type": ["null", "object"], + "properties": { + "type": { + "type": ["null", "string"] + }, + "config": { + "type": ["null", "object"], + "properties": { + "time_window": { + "type": ["null", "number"] + }, + "recommended_time_window": { + "type": ["null", "number"] + } + } + } + } + }, + "integrations": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + } + }, + "response_play": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/teams.json b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/teams.json new file mode 100644 index 000000000000..fb3ec34e852b --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/teams.json @@ -0,0 +1,35 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Teams Schema", + "additionalProperties": true, + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "default_role": { + "type": ["null", "string"] + }, + "parent": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/users.json b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/users.json new file mode 100644 index 000000000000..3e8e0e24c34b --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/schemas/users.json @@ -0,0 +1,179 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Users Schema", + "additionalProperties": true, + "type": ["null", "object"], + "properties": { + "name": { + "type": ["null", "string"] + }, + "email": { + "type": ["null", "string"] + }, + "time_zone": { + "type": ["null", "string"] + }, + "color": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "billed": { + "type": "boolean" + }, + "role": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "invitation_sent": { + "type": "boolean" + }, + "job_title": { + "type": ["null", "string"] + }, + "teams": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + } + }, + "contact_methods": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + } + }, + "notification_rules": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + } + }, + "coordinated_incidents": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "incident": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "requester": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } + }, + "message": { + "type": ["null", "string"] + }, + "state": { + "type": ["null", "string"] + }, + "requested_at": { + "type": ["null", "string"] + } + } + } + }, + "id": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "summary": { + "type": ["null", "string"] + }, + "self": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/source.py b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/source.py new file mode 100644 index 000000000000..96ff2d87bbfb --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/source.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource + +""" +This file provides the necessary constructs to interpret a provided declarative YAML configuration file into +source connector. + +WARNING: Do not modify this file. +""" + + +# Declarative Source +class SourcePagerduty(YamlDeclarativeSource): + def __init__(self): + super().__init__(**{"path_to_yaml": "manifest.yaml"}) diff --git a/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/spec.yaml b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/spec.yaml new file mode 100644 index 000000000000..b40b90d2779c --- /dev/null +++ b/airbyte-integrations/connectors/source-pagerduty/source_pagerduty/spec.yaml @@ -0,0 +1,77 @@ +documentationUrl: https://docs.faros.ai +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: PagerDuty Spec + type: object + required: + - token + additionalProperties: true + properties: + token: + type: string + title: API key + description: API key for PagerDuty API authentication + airbyte_secret: true + cutoff_days: + type: integer + title: Cutoff Days + default: 90 + description: Fetch pipelines updated in the last number of days + page_size: + type: integer + minimum: 1 + maximum: 25 + default: 25 + title: Page Size + description: page size to use when querying PagerDuty API + incident_log_entries_overview: + type: boolean + title: Incident Log Entries Overview + description: + If true, will return a subset of log entries that show only the + most important changes to the incident. + default: true + default_severity: + type: string + title: Severity category + description: A default severity category if not present + examples: + - Sev1 + - Sev2 + - Sev3 + - Sev4 + - Sev5 + - Custom + pattern: "^(Sev[0-5])?(Custom)?$" + exclude_services: + type: array + items: + type: string + title: Exclude Services + examples: + - service-1 + - service-2 + description: + List of PagerDuty service names to ignore incidents from. If not + set, all incidents will be pulled. + service_details: + type: array + items: + type: string + enum: + - escalation_policies + - teams + - integrations + - auto_pause_notifications_parameters + title: Service Details + description: List of PagerDuty service additional details to include. + max_retries: + type: integer + minimum: 0 + maximum: 8 + default: 5 + title: Max Retries + description: + Maximum number of PagerDuty API request retries to perform upon + connection errors. The source will pause for an exponentially increasing number + of seconds before retrying. diff --git a/airbyte-integrations/connectors/source-timely/Dockerfile b/airbyte-integrations/connectors/source-timely/Dockerfile index 82cb0a6003e4..159d54a6cc85 100644 --- a/airbyte-integrations/connectors/source-timely/Dockerfile +++ b/airbyte-integrations/connectors/source-timely/Dockerfile @@ -34,5 +34,5 @@ COPY source_timely ./source_timely ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.2.0 LABEL io.airbyte.name=airbyte/source-timely diff --git a/airbyte-integrations/connectors/source-timely/metadata.yaml b/airbyte-integrations/connectors/source-timely/metadata.yaml index f0783a97a8b5..ee32015f005a 100644 --- a/airbyte-integrations/connectors/source-timely/metadata.yaml +++ b/airbyte-integrations/connectors/source-timely/metadata.yaml @@ -2,7 +2,7 @@ data: connectorSubtype: api connectorType: source definitionId: bc617b5f-1b9e-4a2d-bebe-782fd454a771 - dockerImageTag: 0.1.0 + dockerImageTag: 0.2.0 dockerRepository: airbyte/source-timely githubIssueLabel: source-timely icon: timely.svg diff --git a/airbyte-integrations/connectors/source-timely/source_timely/schemas/events.json b/airbyte-integrations/connectors/source-timely/source_timely/schemas/events.json index 29b5f586b99c..3eb8707a5863 100644 --- a/airbyte-integrations/connectors/source-timely/source_timely/schemas/events.json +++ b/airbyte-integrations/connectors/source-timely/source_timely/schemas/events.json @@ -1,6 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "invoice_id": { "type": ["null", "string"] @@ -11,6 +12,19 @@ "locked": { "type": ["null", "boolean"] }, + "creator_id": { + "type": ["null", "integer"] + }, + "state": { + "type": ["null", "object"], + "additionalProperties": true + }, + "timestamps": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, "created_id": { "type": ["null", "integer"] }, @@ -26,7 +40,8 @@ "airbyte_type": "timestamp_with_timezone" }, "estimated_cost": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "amount": { "type": ["null", "number"] @@ -46,13 +61,19 @@ "type": ["null", "integer"] }, "label_ids": { - "type": ["null", "array"] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "forecast_id": { "type": ["null", "string"] }, "user_ids": { - "type": ["null", "array"] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "timer_stopped_on": { "type": ["null", "integer"] @@ -65,7 +86,8 @@ "type": ["null", "integer"] }, "cost": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "amount": { "type": ["null", "number"] @@ -90,13 +112,20 @@ "type": ["null", "integer"] }, "project": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "required_label_ids": { - "type": ["null", "array"] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "labels": { - "type": ["null", "array"] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "invoice_by_budget": { "type": ["null", "boolean"] @@ -123,7 +152,10 @@ "type": ["null", "boolean"] }, "label_ids": { - "type": ["null", "array"] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "enable_labels": { "type": ["null", "string"] @@ -135,7 +167,8 @@ "type": ["null", "boolean"] }, "client": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "external_id": { "type": ["null", "string"] @@ -195,7 +228,8 @@ } }, "user": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "name": { "type": ["null", "string"] @@ -209,7 +243,8 @@ "airbyte_type": "timestamp_with_timezone" }, "avatar": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "medium": { "type": ["null", "string"] @@ -240,7 +275,8 @@ "type": ["null", "string"] }, "estimated_duration": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "total_minutes": { "type": ["null", "integer"] @@ -286,7 +322,8 @@ "type": ["null", "integer"] }, "duration": { - "type": "object", + "type": ["null", "object"], + "additionalProperties": true, "properties": { "total_minutes": { "type": ["null", "integer"] @@ -321,7 +358,10 @@ "type": ["null", "string"] }, "entry_ids": { - "type": ["null", "array"] + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } }, "hour_rate": { "type": ["null", "number"] @@ -334,6 +374,9 @@ }, "timer_started_on": { "type": ["null", "integer"] + }, + "updated_at": { + "type": ["null", "integer"] } } } diff --git a/airbyte-integrations/connectors/source-timely/source_timely/spec.json b/airbyte-integrations/connectors/source-timely/source_timely/spec.json index 98b085bdcf35..ee22cb0ae369 100644 --- a/airbyte-integrations/connectors/source-timely/source_timely/spec.json +++ b/airbyte-integrations/connectors/source-timely/source_timely/spec.json @@ -5,7 +5,7 @@ "title": "Timely Integration Spec", "type": "object", "required": ["account_id", "start_date", "bearer_token"], - "additionalProperties": false, + "additionalProperties": true, "properties": { "account_id": { "title": "account_id", diff --git a/build.gradle b/build.gradle index 54a66dd407ff..bee172871803 100644 --- a/build.gradle +++ b/build.gradle @@ -37,7 +37,6 @@ ext { version = System.getenv("VERSION") ?: env.VERSION image_tag = System.getenv("VERSION") ?: 'dev' skipSlowTests = (System.getProperty('skipSlowTests', 'false') != 'false') - } // Pyenv support. try { diff --git a/docs/connector-development/config-based/tutorial/0-getting-started.md b/docs/connector-development/config-based/tutorial/0-getting-started.md index e1a7b0fd64c9..5a8a940c2973 100644 --- a/docs/connector-development/config-based/tutorial/0-getting-started.md +++ b/docs/connector-development/config-based/tutorial/0-getting-started.md @@ -44,6 +44,7 @@ This can be done by signing up for the Free tier plan on [Exchange Rates Data AP - Python >= 3.9 - Docker must be running - NodeJS +- [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md#L1) CLI ## Next Steps diff --git a/docs/connector-development/config-based/tutorial/6-testing.md b/docs/connector-development/config-based/tutorial/6-testing.md index 5aaa3ef2814e..4bbb90e8ed01 100644 --- a/docs/connector-development/config-based/tutorial/6-testing.md +++ b/docs/connector-development/config-based/tutorial/6-testing.md @@ -27,11 +27,10 @@ and `integration_tests/abnormal_state.json` with } ``` -You can run the acceptance tests with the following commands: +You can run the [acceptance tests](https://github.com/airbytehq/airbyte/blob/master/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md#L1) with the following commands using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md#L1): ```bash -docker build . -t airbyte/source-exchange-rates-tutorial:dev -python -m pytest integration_tests -p integration_tests.acceptance +airbyte-ci connectors --use-remote-secrets=false --name source-exchange-rates-tutorial test ``` ## Next steps: diff --git a/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md b/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md index 722f69b6bd93..e22de18a1ce2 100644 --- a/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md +++ b/docs/connector-development/testing-connectors/connector-acceptance-tests-reference.md @@ -32,8 +32,40 @@ _Note: Not all types of tests work for all connectors, only configure the ones t Build your connector image if needed. -```text -docker build . +**Option A: Building the docker image with `airbyte-ci`** + +This is the preferred method for building and testing connectors. + +If you want to open source your connector we encourage you to use our [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) tool to build your connector. +It will not use a Dockerfile but will build the connector image from our [base image](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/base_images/README.md) and use our internal build logic to build an image from your Python connector code. + +Running `airbyte-ci connectors --name source- build` will build your connector image. +Once the command is done, you will find your connector image in your local docker host: `airbyte/source-:dev`. + + + +**Option B: Building the docker image with a Dockerfile** + +If you don't want to rely on `airbyte-ci` to build your connector, you can build the docker image using your own Dockerfile. This method is not preferred, and is not supported for certified connectors. + +Create a `Dockerfile` in the root of your connector directory. The `Dockerfile` should look something like this: +```Dockerfile + +FROM airbyte/python-connector-base:1.1.0 + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +# ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +# ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] +``` + +Please use this as an example. This is not optimized. + +Build your image: +```bash +docker build . -t airbyte/source-example-python:dev ``` And test via one of the two following Options diff --git a/docs/connector-development/tutorials/building-a-python-source.md b/docs/connector-development/tutorials/building-a-python-source.md index dc86631782be..ce2a66cce0cc 100644 --- a/docs/connector-development/tutorials/building-a-python-source.md +++ b/docs/connector-development/tutorials/building-a-python-source.md @@ -128,22 +128,57 @@ python main.py read --config secrets/config.json --catalog sample_files/configur The nice thing about this approach is that you can iterate completely within in python. The downside is that you are not quite running your source as it will actually be run by Airbyte. Specifically you're not running it from within the docker container that will house it. -**Run the source using docker** -If you want to run your source exactly as it will be run by Airbyte \(i.e. within a docker container\), you can use the following commands from the connector module directory \(`airbyte-integrations/connectors/source-example-python`\): +** Build the source docker image** -```text -# First build the container +You have to build a docker image for your connector if you want to run your source exactly as it will be run by Airbyte. + +**Option A: Building the docker image with `airbyte-ci`** + +This is the preferred method for building and testing connectors. + +If you want to open source your connector we encourage you to use our [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) tool to build your connector. +It will not use a Dockerfile but will build the connector image from our [base image](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/base_images/README.md) and use our internal build logic to build an image from your Python connector code. + +Running `airbyte-ci connectors --name source- build` will build your connector image. +Once the command is done, you will find your connector image in your local docker host: `airbyte/source-:dev`. + + + +**Option B: Building the docker image with a Dockerfile** + +If you don't want to rely on `airbyte-ci` to build your connector, you can build the docker image using your own Dockerfile. This method is not preferred, and is not supported for certified connectors. + +Create a `Dockerfile` in the root of your connector directory. The `Dockerfile` should look something like this: +```Dockerfile + +FROM airbyte/python-connector-base:1.1.0 + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +# ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +# ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] +``` + +Please use this as an example. This is not optimized. + +Build your image: +```bash docker build . -t airbyte/source-example-python:dev +``` -# Then use the following commands to run it +**Run the source docker image** + +``` docker run --rm airbyte/source-example-python:dev spec docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-example-python:dev check --config /secrets/config.json docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-example-python:dev discover --config /secrets/config.json docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/sample_files:/sample_files airbyte/source-example-python:dev read --config /secrets/config.json --catalog /sample_files/configured_catalog.json ``` -Note: Each time you make a change to your implementation you need to re-build the connector image. `docker build . -t airbyte/source-example-python:dev`. This ensures the new python code is added into the docker container. +Note: Each time you make a change to your implementation you need to re-build the connector image. This ensures the new python code is added into the docker container. The nice thing about this approach is that you are running your source exactly as it will be run by Airbyte. The tradeoff is that iteration is slightly slower, because you need to re-build the connector between each change. diff --git a/docs/connector-development/tutorials/cdk-speedrun.md b/docs/connector-development/tutorials/cdk-speedrun.md index d32544898191..d6caac36974f 100644 --- a/docs/connector-development/tutorials/cdk-speedrun.md +++ b/docs/connector-development/tutorials/cdk-speedrun.md @@ -231,10 +231,44 @@ python main.py read --config sample_files/config.json --catalog sample_files/con If all goes well, containerize it so you can use it in the UI: + +**Option A: Building the docker image with `airbyte-ci`** + +This is the preferred method for building and testing connectors. + +If you want to open source your connector we encourage you to use our [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) tool to build your connector. +It will not use a Dockerfile but will build the connector image from our [base image](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/base_images/README.md) and use our internal build logic to build an image from your Python connector code. + +Running `airbyte-ci connectors --name source- build` will build your connector image. +Once the command is done, you will find your connector image in your local docker host: `airbyte/source-:dev`. + + + +**Option B: Building the docker image with a Dockerfile** + +If you don't want to rely on `airbyte-ci` to build your connector, you can build the docker image using your own Dockerfile. This method is not preferred, and is not supported for certified connectors. + +Create a `Dockerfile` in the root of your connector directory. The `Dockerfile` should look something like this: +```Dockerfile + +FROM airbyte/python-connector-base:1.1.0 + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +# ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +# ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] +``` + +Please use this as an example. This is not optimized. + +Build your image: ```bash -docker build . -t airbyte/source-python-http-example:dev +docker build . -t airbyte/source-example-python:dev ``` + You're done. Stop the clock :\) ## Further reading diff --git a/docs/connector-development/tutorials/cdk-tutorial-python-http/use-connector-in-airbyte.md b/docs/connector-development/tutorials/cdk-tutorial-python-http/use-connector-in-airbyte.md index 19f204275b50..db190ea87d3e 100644 --- a/docs/connector-development/tutorials/cdk-tutorial-python-http/use-connector-in-airbyte.md +++ b/docs/connector-development/tutorials/cdk-tutorial-python-http/use-connector-in-airbyte.md @@ -1,6 +1,46 @@ # Step 7: Use the Connector in Airbyte -To use your connector in your own installation of Airbyte, build the docker image for your container by running `docker build . -t airbyte/source-python-http-example:dev`. Then, follow the instructions from the [building a Python source tutorial](../building-a-python-source.md#step-11-add-the-connector-to-the-api-ui) for using the connector in the Airbyte UI, replacing the name as appropriate. +To use your connector in your own installation of Airbyte you have to build the docker image for your connector. + + + +**Option A: Building the docker image with `airbyte-ci`** + +This is the preferred method for building and testing connectors. + +If you want to open source your connector we encourage you to use our [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) tool to build your connector. +It will not use a Dockerfile but will build the connector image from our [base image](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/base_images/README.md) and use our internal build logic to build an image from your Python connector code. + +Running `airbyte-ci connectors --name source- build` will build your connector image. +Once the command is done, you will find your connector image in your local docker host: `airbyte/source-:dev`. + + + +**Option B: Building the docker image with a Dockerfile** + +If you don't want to rely on `airbyte-ci` to build your connector, you can build the docker image using your own Dockerfile. This method is not preferred, and is not supported for certified connectors. + +Create a `Dockerfile` in the root of your connector directory. The `Dockerfile` should look something like this: +```Dockerfile + +FROM airbyte/python-connector-base:1.1.0 + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +# ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +# ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] +``` + +Please use this as an example. This is not optimized. + +Build your image: +```bash +docker build . -t airbyte/source-example-python:dev +``` + +Then, follow the instructions from the [building a Python source tutorial](../building-a-python-source.md#step-11-add-the-connector-to-the-api-ui) for using the connector in the Airbyte UI, replacing the name as appropriate. Note: your built docker image must be accessible to the `docker` daemon running on the Airbyte node. If you're doing this tutorial locally, these instructions are sufficient. Otherwise you may need to push your Docker image to Dockerhub. diff --git a/docs/contributing-to-airbyte/resources/developing-locally.md b/docs/contributing-to-airbyte/resources/developing-locally.md index 2dee4f5de14e..9baa64d5e24c 100644 --- a/docs/contributing-to-airbyte/resources/developing-locally.md +++ b/docs/contributing-to-airbyte/resources/developing-locally.md @@ -107,9 +107,11 @@ In your local `airbyte` repository, run the following command: ``` - Then, build the connector image: -``` -docker build ./airbyte-integrations/connectors/ -t airbyte/:dev -``` + - Install our [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) tool to build your connector. + - Running `airbyte-ci connectors --name source- build` will build your connector image. + - Once the command is done, you will find your connector image in your local docker host: `airbyte/source-:dev`. + + :::info diff --git a/docs/deploying-airbyte/on-kubernetes-via-helm.md b/docs/deploying-airbyte/on-kubernetes-via-helm.md index a79f74e46e9d..1006ed9e3ee5 100644 --- a/docs/deploying-airbyte/on-kubernetes-via-helm.md +++ b/docs/deploying-airbyte/on-kubernetes-via-helm.md @@ -104,6 +104,8 @@ In order to do so, run the command: helm install %release_name% airbyte/airbyte ``` +**Note**: `release_name` should only contain lowercase letters and optionally dashes (`release_name` must start with a letter). + ### Custom deployment In order to customize your deployment, you need to create `values.yaml` file in the local folder and populate it with default configuration override values. diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index 09fa0380f79b..813c2c7d5c97 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -127,6 +127,7 @@ Now that you have set up the BigQuery destination connector, check out the follo | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 2.1.6 | 2023-10-23 | [\#31717](https://github.com/airbytehq/airbyte/pull/31717) | Remove inadvertent Destination v2 check | | 2.1.5 | 2023-10-17 | [\#30069](https://github.com/airbytehq/airbyte/pull/30069) | Staging destination async | | 2.1.4 | 2023-10-17 | [\#31191](https://github.com/airbytehq/airbyte/pull/31191) | Improve typing+deduping performance by filtering new raw records on extracted_at | | 2.1.3 | 2023-10-10 | [\#31358](https://github.com/airbytehq/airbyte/pull/31358) | Stringify array and object types for type:string column in final table | diff --git a/docs/integrations/sources/harness.md b/docs/integrations/sources/harness.md index f858f3617dbf..b6433e30483a 100644 --- a/docs/integrations/sources/harness.md +++ b/docs/integrations/sources/harness.md @@ -2,7 +2,7 @@ ## Overview -The Harness source is maintained by [Faros +The Harness source is migrated from [Faros AI](https://github.com/faros-ai/airbyte-connectors/tree/main/sources/harness-source). Please file any support requests on that repo to minimize response time from the maintainers. The source supports both Full Refresh and Incremental syncs. You @@ -13,19 +13,19 @@ the tables and columns you set up for replication, every time a sync is run. Only one stream is currently available from this source: -* [Executions](https://docs.harness.io/article/ba4vs50071-use-workflows-api) \(Incremental\) +* [Organization](https://apidocs.harness.io/tag/Organization#operation/getOrganizationList) If there are more endpoints you'd like Faros AI to support, please [create an issue.](https://github.com/faros-ai/airbyte-connectors/issues/new) ### Features -| Feature | Supported? | -| :--- | :--- | -| Full Refresh Sync | Yes | -| Incremental Sync | Yes | -| SSL connection | Yes | -| Namespaces | No | +| Feature | Supported? | +| :----------------- | :--------- | +| Full Refresh Sync | Yes | +| Incremental Sync | No | +| SSL connection | No | +| Namespaces | No | ### Performance considerations @@ -47,6 +47,7 @@ Key](https://ngdocs.harness.io/article/tdoad7xrh9-add-and-manage-api-keys#harnes ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.23 | 2021-11-16 | [153](https://github.com/faros-ai/airbyte-connectors/pull/153) | Add Harness source and Faros destination's converter | +| Version | Date | Pull Request | Subject | +| :--------- | :--------- | :------------------------------------------------------------------ | :---------------------------------------------------- | +| 0.1.0 | 2023-10-10 | [31103](https://github.com/airbytehq/airbyte/pull/31103) | Migrate to low code | +| 0.1.23 | 2021-11-16 | [153](https://github.com/faros-ai/airbyte-connectors/pull/153) | Add Harness source and Faros destination's converter | diff --git a/docs/integrations/sources/klarna.md b/docs/integrations/sources/klarna.md index a7699f9322f0..ff809bbb2a35 100644 --- a/docs/integrations/sources/klarna.md +++ b/docs/integrations/sources/klarna.md @@ -58,4 +58,5 @@ Connector will handle an issue with rate limiting as Klarna returns 429 status c | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:-----------------------------------------------| +| 0.2.0 | 2023-10-23 | [31003](https://github.com/airbytehq/airbyte/pull/31003) | Migrate to low-code | | 0.1.0 | 2022-10-24 | [18385](https://github.com/airbytehq/airbyte/pull/18385) | Klarna Settlements Payout and Transactions API | diff --git a/docs/integrations/sources/klaviyo-migrations.md b/docs/integrations/sources/klaviyo-migrations.md new file mode 100644 index 000000000000..3bae687b326a --- /dev/null +++ b/docs/integrations/sources/klaviyo-migrations.md @@ -0,0 +1,6 @@ +# Klaviyo Migration Guide + +## Upgrading to 1.0.0 + +`event_properties/items/quantity` for `Events` stream is changed from `integer` to `number`. +For a smooth migration, data reset and schema refresh are needed. \ No newline at end of file diff --git a/docs/integrations/sources/klaviyo.md b/docs/integrations/sources/klaviyo.md index 0d84e07e63b7..b6408c4ffa99 100644 --- a/docs/integrations/sources/klaviyo.md +++ b/docs/integrations/sources/klaviyo.md @@ -52,21 +52,23 @@ The Klaviyo connector should not run into Klaviyo API limitations under normal u ## Changelog -| Version | Date | Pull Request | Subject | -|:---------|:-----------| :--------------------------------------------------------- |:------------------------------------------------------------------------------------------| -| `0.4.0` | 2023-10-18 | [31562](https://github.com/airbytehq/airbyte/pull/31562) | Add `archived` field to `Flows` stream | -| `0.3.3` | 2023-10-13 | [31379](https://github.com/airbytehq/airbyte/pull/31379) | Skip streams that the connector no longer has access to | -| `0.3.2` | 2023-06-20 | [27498](https://github.com/airbytehq/airbyte/pull/27498) | Do not store state in the future | -| `0.3.1` | 2023-06-08 | [27162](https://github.com/airbytehq/airbyte/pull/27162) | Anonymize check connection error message | -| `0.3.0` | 2023-02-18 | [23236](https://github.com/airbytehq/airbyte/pull/23236) | Add ` Email Templates` stream | -| `0.2.0` | 2023-03-13 | [22942](https://github.com/airbytehq/airbyte/pull/23968) | Add `Profiles` stream | -| `0.1.13` | 2023-02-13 | [22942](https://github.com/airbytehq/airbyte/pull/22942) | Specified date formatting in specification | -| `0.1.12` | 2023-01-30 | [22071](https://github.com/airbytehq/airbyte/pull/22071) | Fix `Events` stream schema | -| `0.1.11` | 2023-01-27 | [22012](https://github.com/airbytehq/airbyte/pull/22012) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| `0.1.10` | 2022-09-29 | [17422](https://github.com/airbytehq/airbyte/issues/17422) | Update CDK dependency | -| `0.1.9` | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/issues/17304) | Migrate to per-stream state. | -| `0.1.6` | 2022-07-20 | [14872](https://github.com/airbytehq/airbyte/issues/14872) | Increase test coverage | -| `0.1.5` | 2022-07-12 | [14617](https://github.com/airbytehq/airbyte/issues/14617) | Set max_retries = 10 for `lists` stream. | -| `0.1.4` | 2022-04-15 | [11723](https://github.com/airbytehq/airbyte/issues/11723) | Enhance klaviyo source for flows stream and update to events stream. | -| `0.1.3` | 2021-12-09 | [8592](https://github.com/airbytehq/airbyte/pull/8592) | Improve performance, make Global Exclusions stream incremental and enable Metrics stream. | -| `0.1.2` | 2021-10-19 | [6952](https://github.com/airbytehq/airbyte/pull/6952) | Update schema validation in SAT | +| Version | Date | Pull Request | Subject | +|:---------|:-----------| :--------------------------------------------------------- |:-------------------------------------------------------------------------------------------| +| `1.0.0` | 2023-10-18 | [31565](https://github.com/airbytehq/airbyte/pull/31565) | added new known fields for 'events' stream | +| `0.5.0` | 2023-10-19 | [31611](https://github.com/airbytehq/airbyte/pull/31611) | Add `date-time` format for `datetime` field in `Events` stream | +| `0.4.0` | 2023-10-18 | [31562](https://github.com/airbytehq/airbyte/pull/31562) | Add `archived` field to `Flows` stream | +| `0.3.3` | 2023-10-13 | [31379](https://github.com/airbytehq/airbyte/pull/31379) | Skip streams that the connector no longer has access to | +| `0.3.2` | 2023-06-20 | [27498](https://github.com/airbytehq/airbyte/pull/27498) | Do not store state in the future | +| `0.3.1` | 2023-06-08 | [27162](https://github.com/airbytehq/airbyte/pull/27162) | Anonymize check connection error message | +| `0.3.0` | 2023-02-18 | [23236](https://github.com/airbytehq/airbyte/pull/23236) | Add ` Email Templates` stream | +| `0.2.0` | 2023-03-13 | [22942](https://github.com/airbytehq/airbyte/pull/23968) | Add `Profiles` stream | +| `0.1.13` | 2023-02-13 | [22942](https://github.com/airbytehq/airbyte/pull/22942) | Specified date formatting in specification | +| `0.1.12` | 2023-01-30 | [22071](https://github.com/airbytehq/airbyte/pull/22071) | Fix `Events` stream schema | +| `0.1.11` | 2023-01-27 | [22012](https://github.com/airbytehq/airbyte/pull/22012) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| `0.1.10` | 2022-09-29 | [17422](https://github.com/airbytehq/airbyte/issues/17422) | Update CDK dependency | +| `0.1.9` | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/issues/17304) | Migrate to per-stream state. | +| `0.1.6` | 2022-07-20 | [14872](https://github.com/airbytehq/airbyte/issues/14872) | Increase test coverage | +| `0.1.5` | 2022-07-12 | [14617](https://github.com/airbytehq/airbyte/issues/14617) | Set max_retries = 10 for `lists` stream. | +| `0.1.4` | 2022-04-15 | [11723](https://github.com/airbytehq/airbyte/issues/11723) | Enhance klaviyo source for flows stream and update to events stream. | +| `0.1.3` | 2021-12-09 | [8592](https://github.com/airbytehq/airbyte/pull/8592) | Improve performance, make Global Exclusions stream incremental and enable Metrics stream. | +| `0.1.2` | 2021-10-19 | [6952](https://github.com/airbytehq/airbyte/pull/6952) | Update schema validation in SAT | diff --git a/docs/integrations/sources/open-exchange-rates.md b/docs/integrations/sources/open-exchange-rates.md index 75d00a26f1cd..348a54b6b48d 100644 --- a/docs/integrations/sources/open-exchange-rates.md +++ b/docs/integrations/sources/open-exchange-rates.md @@ -45,4 +45,5 @@ If you have `free` subscription plan \(you may check it [here](https://openexcha | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :--------------------------------------------------------------------------------------------------- | +| 0.2.0 | 2023-10-03 | [30983](https://github.com/airbytehq/airbyte/pull/30983) | Migrate to low code | | 0.1.0 | 2022-11-15 | [19436](https://github.com/airbytehq/airbyte/issues/19436) | Created CDK native Open Exchange Rates connector | diff --git a/docs/integrations/sources/orbit.md b/docs/integrations/sources/orbit.md index 370235692a7b..e1f41519d03f 100644 --- a/docs/integrations/sources/orbit.md +++ b/docs/integrations/sources/orbit.md @@ -45,5 +45,6 @@ The Orbit API Key should be available to you immediately as an Orbit user. | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.2.0 | 2023-10-23 | [14208](https://github.com/airbytehq/airbyte/pull/14208) | Update schema | | 0.1.1 | 2022-06-28 | [14208](https://github.com/airbytehq/airbyte/pull/14208) | Remove unused schema | | 0.1.0 | 2022-06-27 | [13390](https://github.com/airbytehq/airbyte/pull/13390) | Initial Release | diff --git a/docs/integrations/sources/pagerduty.md b/docs/integrations/sources/pagerduty.md index fb5e5660f39e..e517fecf87c8 100644 --- a/docs/integrations/sources/pagerduty.md +++ b/docs/integrations/sources/pagerduty.md @@ -48,6 +48,7 @@ Key](https://support.pagerduty.com/docs/generating-api-keys#section-generating-a ## Changelog -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.1.23 | 2021-11-12 | [125](https://github.com/faros-ai/airbyte-connectors/pull/125) | Add Pagerduty source and destination | +| Version | Date | Pull Request | Subject | +| :------- | :--------- | :----------------------------------------------------------------- | :----------------------------------- | +| 0.2.0 | 2023-10-20 | [31160](https://github.com/airbytehq/airbyte/pull/31160) | Migrate to low code | +| 0.1.23 | 2021-11-12 | [125](https://github.com/faros-ai/airbyte-connectors/pull/125) | Add Pagerduty source and destination | diff --git a/docs/integrations/sources/timely.md b/docs/integrations/sources/timely.md index 1bff1c2ba1d7..9e6c6be95e45 100644 --- a/docs/integrations/sources/timely.md +++ b/docs/integrations/sources/timely.md @@ -33,4 +33,5 @@ The Timely source connector supports the following [sync modes](https://docs.air | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :-------------- | +| 0.2.0 | 2023-10-23 | [13617](https://github.com/airbytehq/airbyte/pull/13617) | Fix schemas | | 0.1.0 | 2022-06-22 | [13617](https://github.com/airbytehq/airbyte/pull/13617) | Initial release | diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 0765b6b74fcf..7196ba282982 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -117,7 +117,7 @@ If you are upgrading from (i.e. your current version of Airbyte is) Airbyte vers Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.50.31 --\ + docker run --rm -v /tmp:/config airbyte/migration:0.50.32 --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/docs/snowflake-native-apps/facebook-marketing.md b/docs/snowflake-native-apps/facebook-marketing.md index f461d3cdf2df..7d06510d0b66 100644 --- a/docs/snowflake-native-apps/facebook-marketing.md +++ b/docs/snowflake-native-apps/facebook-marketing.md @@ -40,10 +40,10 @@ In order for the Facebook Marketing Connector by Airbyte to query Facebook's API By default the app will be installed using the name `AIRBYTE_FACEBOOK_MARKETING`, but if you renamed the app during installation, you will have to use that name as a reference. ::: -1. Create the database where the app will access the authorization. This database can be different from the database where the sync will output records. +1. Create the database where the app will access the authorization. ``` -CREATE DATABASE ; -USE ; +CREATE DATABASE airbyte_facebook_marketing_db; +USE airbyte_facebook_marketing_db; ``` 2. The native app will validate the output database and create it if it does not exist. In order to do that, the app needs access to the database: @@ -65,7 +65,7 @@ As of 2023-09-13, the [Snowflake documentation](https://docs.snowflake.com/en/sq 4. Once you have external access configured, you need define your authorization/authentication. Provide the credentials to the app as such: ``` -CREATE OR REPLACE SECRET integration_facebook_marketing_oauth +CREATE OR REPLACE SECRET airbyte_app_secret TYPE = GENERIC_STRING SECRET_STRING = '{ "access_token": "" @@ -75,22 +75,22 @@ CREATE OR REPLACE SECRET integration_facebook_marketing_oauth 5. Once the network rule and the secret are defined in Snowflake, you need to make them available to the app by using an external access integration. ``` -CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION integration_facebook_marketing +CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION airbyte_app_integration ALLOWED_NETWORK_RULES = (facebook_marketing_apis_network_rule) - ALLOWED_AUTHENTICATION_SECRETS = (integration_facebook_marketing_oauth) + ALLOWED_AUTHENTICATION_SECRETS = (airbyte_app_secret) ENABLED = true; ``` 6. Grant permission for the app to access the integration. ``` -GRANT USAGE ON INTEGRATION integration_facebook_marketing TO APPLICATION AIRBYTE_FACEBOOK_MARKETING; +GRANT USAGE ON INTEGRATION airbyte_app_integration TO APPLICATION AIRBYTE_FACEBOOK_MARKETING; ``` 7. Grant permissions for the app to access the database that houses the secret and read the secret. ``` -GRANT USAGE ON DATABASE TO APPLICATION AIRBYTE_FACEBOOK_MARKETING; -GRANT USAGE ON SCHEMA TO APPLICATION AIRBYTE_FACEBOOK_MARKETING; -GRANT READ ON SECRET integration_facebook_marketing_oauth TO APPLICATION AIRBYTE_FACEBOOK_MARKETING; +GRANT USAGE ON DATABASE airbyte_facebook_marketing_db TO APPLICATION AIRBYTE_FACEBOOK_MARKETING; +GRANT USAGE ON SCHEMA public TO APPLICATION AIRBYTE_FACEBOOK_MARKETING; +GRANT READ ON SECRET airbyte_app_secret TO APPLICATION AIRBYTE_FACEBOOK_MARKETING; ``` @@ -101,18 +101,6 @@ Once you have access to the app, select `New Connection` and fill the following --- -`Secret` - -The name of the secret prefixed by which database and schema. Based on the previous steps: `..integration_facebook_marketing_oauth`. - ---- - -`External Access Integration` - -Name of the Snowflake integration where the secret and network rules are configured. Based on the previous steps: `integration_facebook_marketing`. - ---- - `account_id` The Facebook Ad account ID to use when pulling data from the Facebook Marketing API. The Ad account ID number is in the account dropdown menu or in your browser's address bar of your [Meta Ads Manager](https://adsmanager.facebook.com/adsmanager/). diff --git a/docs/snowflake-native-apps/linkedin-ads.md b/docs/snowflake-native-apps/linkedin-ads.md index 7cc1cf3a1ae0..84dcc34384f0 100644 --- a/docs/snowflake-native-apps/linkedin-ads.md +++ b/docs/snowflake-native-apps/linkedin-ads.md @@ -40,10 +40,10 @@ In order for the LinkedIn Ads Connector by Airbyte to query LinkedIn, you will n By default the app will be installed using the name `AIRBYTE_LINKEDIN_ADS`, but if you renamed the app during installation, you will have to use that name as a reference. ::: -1. Create the database where the app will access the authorization. This database can be different from the database where the sync will output records. +1. Create the database where the app will access the authorization. ``` -CREATE DATABASE ; -USE ; +CREATE DATABASE airbyte_linkedin_ads_db; +USE airbyte_linkedin_ads_db; ``` 2. The native app will validate the output database and create it if it does not exist. In order to do that, the app needs access to the database: @@ -65,7 +65,7 @@ As of 2023-09-13, the [Snowflake documentation](https://docs.snowflake.com/en/sq 4. Once you have external access configured, you need define your authorization/authentication. Provide the credentials to the app as such: ``` -CREATE OR REPLACE SECRET integration_linkedin_ads_oauth +CREATE OR REPLACE SECRET airbyte_app_secret TYPE = GENERIC_STRING SECRET_STRING = '{ "auth_method": "oAuth2.0", @@ -78,22 +78,22 @@ CREATE OR REPLACE SECRET integration_linkedin_ads_oauth 5. Once the network rule and the secret are defined in Snowflake, you need to make them available to the app by using an external access integration. ``` -CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION integration_linkedin_ads +CREATE OR REPLACE EXTERNAL ACCESS INTEGRATION airbyte_app_integration ALLOWED_NETWORK_RULES = (linkedin_apis_network_rule) - ALLOWED_AUTHENTICATION_SECRETS = (integration_linkedin_ads_oauth) + ALLOWED_AUTHENTICATION_SECRETS = (airbyte_app_secret) ENABLED = true; ``` 6. Grant permission for the app to access the integration. ``` -GRANT USAGE ON INTEGRATION integration_linkedin_ads TO APPLICATION AIRBYTE_LINKEDIN_ADS; +GRANT USAGE ON INTEGRATION airbyte_app_integration TO APPLICATION AIRBYTE_LINKEDIN_ADS; ``` 7. Grant permissions for the app to access the database that houses the secret and read the secret. ``` -GRANT USAGE ON DATABASE TO APPLICATION AIRBYTE_LINKEDIN_ADS; -GRANT USAGE ON SCHEMA TO APPLICATION AIRBYTE_LINKEDIN_ADS; -GRANT READ ON SECRET integration_linkedin_ads_oauth TO APPLICATION AIRBYTE_LINKEDIN_ADS; +GRANT USAGE ON DATABASE airbyte_linkedin_ads_db TO APPLICATION AIRBYTE_LINKEDIN_ADS; +GRANT USAGE ON SCHEMA public TO APPLICATION AIRBYTE_LINKEDIN_ADS; +GRANT READ ON SECRET airbyte_app_secret TO APPLICATION AIRBYTE_LINKEDIN_ADS; ``` 8. Grant permissions for the app to create a warehouse on which to execute sync tasks, and to execute tasks. @@ -112,18 +112,6 @@ Once you have access to the app, select `New Connection` and fill the following --- -`Secret` - -The name of the secret prefixed by which database and schema. Based on the previous steps: `..integration_linkedin_ads_oauth`. - ---- - -`External Access Integration` - -Name of the Snowflake integration where the secret and network rules are configured. Based on the previous steps: `integration_linkedin_ads`. - ---- - `start_date` UTC date in the format 2020-09-17. Any data before this date will not be replicated. diff --git a/docs/understanding-airbyte/basic-normalization.md b/docs/understanding-airbyte/basic-normalization.md index bbebf577fdd7..e51f4eb1a1ac 100644 --- a/docs/understanding-airbyte/basic-normalization.md +++ b/docs/understanding-airbyte/basic-normalization.md @@ -1,5 +1,11 @@ # Basic Normalization +:::danger + +Basic normalization is being removed in favor of [Typing and Deduping](/understanding-airbyte/typing-deduping), as part of [Destinations V2](/release_notes/upgrading_to_destinations_v2). This pages remains as a guide for legacy connectors. + +::: + ## High-Level Overview :::info diff --git a/docs/understanding-airbyte/connections/README.md b/docs/understanding-airbyte/connections/README.md index 49a0756a43d9..5e6c449152b7 100644 --- a/docs/understanding-airbyte/connections/README.md +++ b/docs/understanding-airbyte/connections/README.md @@ -63,19 +63,13 @@ A sync mode is therefore, a combination of a source and destination mode togethe ## Optional operations -### Airbyte basic normalization +### Typing and Deduping -As described by the [Airbyte Protocol from the Airbyte Specifications](../airbyte-protocol.md), replication is composed of source connectors that are transmitting data in a JSON format. It is then written as such by the destination connectors. - -On top of this replication, Airbyte provides the option to enable or disable an additional transformation step at the end of the sync called [basic normalization](../basic-normalization.md). This operation is: - -- Only available for destinations that support dbt execution -- Automatically generates a pipeline or DAG of dbt transformation models to convert JSON blob objects into normalized tables -- Runs and applies these dbt models to the data written in the destination +As described by the [Airbyte Protocol from the Airbyte Specifications](../airbyte-protocol.md), replication is composed of source connectors that are transmitting data in a JSON format. It is then written as such by the destination connectors. On top of this replication, Airbyte's database and datawarehous destinations can provide converstions from the raw JSON data into type-cast relational columns. Learn more [here](/understanding-airbyte/typing-deduping). :::note -Normalizing data may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is normalized and is not related to Airbyte credit usage. +Typing and Deduping may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is transformed and is not related to Airbyte credit usage. ::: diff --git a/gradle.properties b/gradle.properties index 3e934f5138bb..0307d9e24b7a 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,4 +1,4 @@ -VERSION=0.50.31 +VERSION=0.50.32 # NOTE: some of these values are overwritten in CI! # NOTE: if you want to override this for your local machine, set overrides in ~/.gradle/gradle.properties diff --git a/run-ab-platform.sh b/run-ab-platform.sh index 3141fb38725d..240f6cbee699 100755 --- a/run-ab-platform.sh +++ b/run-ab-platform.sh @@ -1,6 +1,6 @@ #!/bin/bash -VERSION=0.50.31 +VERSION=0.50.32 # Run away from anything even a little scary set -o nounset # -u exit if a variable is not set set -o errexit # -f exit for any command failure" diff --git a/settings.gradle b/settings.gradle index 903962ab182c..c9b9aaf27b13 100644 --- a/settings.gradle +++ b/settings.gradle @@ -146,13 +146,11 @@ if (isCiServer || isAirbyteCI) { enabled = isAirbyteCI } remote(com.github.burrunan.s3cache.AwsS3BuildCache) { - region = 'us-east-2' - bucket = 'airbyte-buildcache' - prefix = 'cache/' - push = isCiServer - enabled = isCiServer && !isAirbyteCI - // Credentials will be taken from S3_BUILD_CACHE_... environment variables - // anonymous access will be used if environment variables are missing + region = 'us-west-2' // close to dagger runners + bucket = 'ab-ci-cache' + prefix = "${System.getProperty('s3BuildCachePrefix', 'connectors')}-ci-cache/" + push = isAirbyteCI + enabled = System.getenv().containsKey("S3_BUILD_CACHE_ACCESS_KEY_ID") } } }