From 07579bdaec221dfdd36c493254207b1f4a626bf6 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 16 Jan 2024 13:04:01 -0800 Subject: [PATCH 01/12] Destination Postgres: Unpin cloud from 0.4.0 (#34303) Signed-off-by: Gireesh Sreepathi --- .../connectors/destination-postgres/metadata.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte-integrations/connectors/destination-postgres/metadata.yaml b/airbyte-integrations/connectors/destination-postgres/metadata.yaml index 54f3288be39f..601a751d1cee 100644 --- a/airbyte-integrations/connectors/destination-postgres/metadata.yaml +++ b/airbyte-integrations/connectors/destination-postgres/metadata.yaml @@ -18,11 +18,9 @@ data: normalizationTag: 0.4.3 registries: cloud: - dockerImageTag: 0.4.0 dockerRepository: airbyte/destination-postgres-strict-encrypt enabled: true oss: - dockerImageTag: 0.4.0 enabled: true releaseStage: alpha supportLevel: community From db83e149dda33a72fe2c91654e6bc36f5ea3501c Mon Sep 17 00:00:00 2001 From: Joe Bell Date: Tue, 16 Jan 2024 13:23:39 -0800 Subject: [PATCH 02/12] Destination Redshift - additional check method check, fix s3 file deletion (#34186) Signed-off-by: Gireesh Sreepathi Co-authored-by: Sitaram Shelke Co-authored-by: Marcos Marx Co-authored-by: Cynthia Yin Co-authored-by: Baz Co-authored-by: bazarnov Co-authored-by: kekiss Co-authored-by: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com> Co-authored-by: Edward Gao Co-authored-by: Augustin Co-authored-by: Joe Reuter Co-authored-by: Alexandre Cuoci Co-authored-by: perangel Co-authored-by: Aaron ("AJ") Steers Co-authored-by: Ben Church Co-authored-by: Gireesh Sreepathi --- airbyte-cdk/java/airbyte-cdk/README.md | 1 + .../src/main/resources/version.properties | 2 +- .../jdbc/AbstractJdbcDestination.java | 10 ++++++ .../staging/GeneralStagingFunctions.java | 30 +++++++++++------ .../destination/staging/SerialFlush.java | 5 ++- .../staging/StagingOperations.java | 25 ++------------- .../destination/s3/BlobStorageOperations.java | 2 +- .../destination/s3/S3ConsumerFactory.java | 1 - .../destination/s3/S3StorageOperations.java | 1 - .../destination/staging/AsyncFlush.java | 6 ++-- .../staging/StagingConsumerFactory.java | 10 ------ .../destination-redshift/build.gradle | 3 +- .../destination-redshift/metadata.yaml | 2 +- .../redshift/RedshiftInsertDestination.java | 6 ++++ .../RedshiftStagingS3Destination.java | 4 ++- .../RedshiftS3StagingSqlOperations.java | 32 ++----------------- .../redshift/util/RedshiftUtil.java | 8 +++++ docs/integrations/destinations/redshift.md | 25 ++++++++++++--- 18 files changed, 82 insertions(+), 91 deletions(-) diff --git a/airbyte-cdk/java/airbyte-cdk/README.md b/airbyte-cdk/java/airbyte-cdk/README.md index 5a45cbb2c302..d572437df71c 100644 --- a/airbyte-cdk/java/airbyte-cdk/README.md +++ b/airbyte-cdk/java/airbyte-cdk/README.md @@ -166,6 +166,7 @@ MavenLocal debugging steps: | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.12.1 | 2024-01-11 | [\#34186](https://github.com/airbytehq/airbyte/pull/34186) | Add hook for additional destination specific checks to JDBC destination check method | | 0.12.0 | 2024-01-10 | [\#33875](https://github.com/airbytehq/airbyte/pull/33875) | Upgrade sshd-mina to 2.11.1 | | 0.11.5 | 2024-01-10 | [\#34119](https://github.com/airbytehq/airbyte/pull/34119) | Remove wal2json support for postgres+debezium. | | 0.11.4 | 2024-01-09 | [\#33305](https://github.com/airbytehq/airbyte/pull/33305) | Source stats in incremental syncs | diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties index 753eb4c8def5..db02062e2991 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties @@ -1 +1 @@ -version=0.12.0 +version=0.12.1 diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java index 49572d746bbe..ff93320b19bd 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java @@ -92,6 +92,7 @@ public AirbyteConnectionStatus check(final JsonNode config) { final var v2RawSchema = namingResolver.getIdentifier(TypingAndDedupingFlag.getRawNamespaceOverride(RAW_SCHEMA_OVERRIDE) .orElse(JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE)); attemptTableOperations(v2RawSchema, database, namingResolver, sqlOperations, false); + destinationSpecificTableOperations(database); } return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); } catch (final ConnectionErrorException ex) { @@ -114,6 +115,15 @@ public AirbyteConnectionStatus check(final JsonNode config) { } } + /** + * Specific Databases may have additional checks unique to them which they need to perform, override + * this method to add additional checks. + * + * @param database the database to run checks against + * @throws Exception + */ + protected void destinationSpecificTableOperations(final JdbcDatabase database) throws Exception {} + /** * This method is deprecated. It verifies table creation, but not insert right to a newly created * table. Use attemptTableOperations with the attemptInsert argument instead. diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.java index e28e9ec1d8f3..b01962a17bc7 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/GeneralStagingFunctions.java @@ -13,6 +13,7 @@ import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; import java.util.ArrayList; import java.util.List; +import java.util.UUID; import java.util.concurrent.locks.Lock; import lombok.extern.slf4j.Slf4j; @@ -22,6 +23,16 @@ @Slf4j public class GeneralStagingFunctions { + // using a random string here as a placeholder for the moment. + // This would avoid mixing data in the staging area between different syncs (especially if they + // manipulate streams with similar names) + // if we replaced the random connection id by the actual connection_id, we'd gain the opportunity to + // leverage data that was uploaded to stage + // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) instead. + // This would also allow other programs/scripts + // to load (or reload backups?) in the connection's staging area to be loaded at the next sync. + public static final UUID RANDOM_CONNECTION_ID = UUID.randomUUID(); + public static OnStartFunction onStartFunction(final JdbcDatabase database, final StagingOperations stagingOperations, final List writeConfigs, @@ -34,7 +45,6 @@ public static OnStartFunction onStartFunction(final JdbcDatabase database, final String schema = writeConfig.getOutputSchemaName(); final String stream = writeConfig.getStreamName(); final String dstTableName = writeConfig.getOutputTableName(); - final String stageName = stagingOperations.getStageName(schema, dstTableName); final String stagingPath = stagingOperations.getStagingPath(SerialStagingConsumerFactory.RANDOM_CONNECTION_ID, schema, stream, writeConfig.getOutputTableName(), writeConfig.getWriteDatetime()); @@ -44,7 +54,7 @@ public static OnStartFunction onStartFunction(final JdbcDatabase database, stagingOperations.createSchemaIfNotExists(database, schema); stagingOperations.createTableIfNotExists(database, schema, dstTableName); - stagingOperations.createStageIfNotExists(database, stageName); + stagingOperations.createStageIfNotExists(); /* * When we're in OVERWRITE, clear out the table at the start of a sync, this is an expected side @@ -68,7 +78,6 @@ public static OnStartFunction onStartFunction(final JdbcDatabase database, * upload was unsuccessful */ public static void copyIntoTableFromStage(final JdbcDatabase database, - final String stageName, final String stagingPath, final List stagedFiles, final String tableName, @@ -83,7 +92,7 @@ public static void copyIntoTableFromStage(final JdbcDatabase database, final Lock rawTableInsertLock = typerDeduper.getRawTableInsertLock(streamNamespace, streamName); rawTableInsertLock.lock(); try { - stagingOperations.copyIntoTableFromStage(database, stageName, stagingPath, stagedFiles, + stagingOperations.copyIntoTableFromStage(database, stagingPath, stagedFiles, tableName, schemaName); } finally { rawTableInsertLock.unlock(); @@ -96,8 +105,6 @@ public static void copyIntoTableFromStage(final JdbcDatabase database, typerDeduperValve.updateTimeAndIncreaseInterval(streamId); } } catch (final Exception e) { - stagingOperations.cleanUpStage(database, stageName, stagedFiles); - log.info("Cleaning stage path {}", stagingPath); throw new RuntimeException("Failed to upload data from stage " + stagingPath, e); } } @@ -124,10 +131,15 @@ public static OnCloseFunction onCloseFunction(final JdbcDatabase database, for (final WriteConfig writeConfig : writeConfigs) { final String schemaName = writeConfig.getOutputSchemaName(); if (purgeStagingData) { - final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getOutputTableName()); + final String stagePath = stagingOperations.getStagingPath( + RANDOM_CONNECTION_ID, + schemaName, + writeConfig.getStreamName(), + writeConfig.getOutputTableName(), + writeConfig.getWriteDatetime()); log.info("Cleaning stage in destination started for stream {}. schema {}, stage: {}", writeConfig.getStreamName(), schemaName, - stageName); - stagingOperations.dropStageIfExists(database, stageName); + stagePath); + stagingOperations.dropStageIfExists(database, stagePath); } } typerDeduper.commitFinalTables(); diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialFlush.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialFlush.java index 767eea233364..a4cb0c5fdaf3 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialFlush.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/SerialFlush.java @@ -81,15 +81,14 @@ public static FlushBufferFunction function( final WriteConfig writeConfig = pairToWriteConfig.get(pair); final String schemaName = writeConfig.getOutputSchemaName(); - final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getOutputTableName()); final String stagingPath = stagingOperations.getStagingPath( SerialStagingConsumerFactory.RANDOM_CONNECTION_ID, schemaName, writeConfig.getStreamName(), writeConfig.getOutputTableName(), writeConfig.getWriteDatetime()); try (writer) { writer.flush(); - final String stagedFile = stagingOperations.uploadRecordsToStage(database, writer, schemaName, stageName, stagingPath); - GeneralStagingFunctions.copyIntoTableFromStage(database, stageName, stagingPath, List.of(stagedFile), writeConfig.getOutputTableName(), + final String stagedFile = stagingOperations.uploadRecordsToStage(database, writer, schemaName, stagingPath); + GeneralStagingFunctions.copyIntoTableFromStage(database, stagingPath, List.of(stagedFile), writeConfig.getOutputTableName(), schemaName, stagingOperations, writeConfig.getNamespace(), diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/StagingOperations.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/StagingOperations.java index fc04e995fb47..aac9351b4b7d 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/StagingOperations.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/StagingOperations.java @@ -18,15 +18,6 @@ */ public interface StagingOperations extends SqlOperations { - /** - * Returns the staging environment's name - * - * @param namespace Name of schema - * @param streamName Name of the stream - * @return Fully qualified name of the staging environment - */ - String getStageName(String namespace, String streamName); - /** * @param outputTableName The name of the table this staging file will be loaded into (typically a * raw table). Not all destinations use the table name in the staging path (e.g. Snowflake @@ -37,7 +28,7 @@ public interface StagingOperations extends SqlOperations { /** * Create a staging folder where to upload temporary files before loading into the final destination */ - void createStageIfNotExists(JdbcDatabase database, String stageName) throws Exception; + void createStageIfNotExists() throws Exception; /** * Upload the data file into the stage area. @@ -45,40 +36,28 @@ public interface StagingOperations extends SqlOperations { * @param database database used for syncing * @param recordsData records stored in in-memory buffer * @param schemaName name of schema - * @param stageName name of the staging area folder * @param stagingPath path of staging folder to data files * @return the name of the file that was uploaded. */ - String uploadRecordsToStage(JdbcDatabase database, SerializableBuffer recordsData, String schemaName, String stageName, String stagingPath) + String uploadRecordsToStage(JdbcDatabase database, SerializableBuffer recordsData, String schemaName, String stagingPath) throws Exception; /** * Load the data stored in the stage area into a temporary table in the destination * * @param database database interface - * @param stageName name of staging area folder * @param stagingPath path to staging files * @param stagedFiles collection of staged files * @param tableName name of table to write staging files to * @param schemaName name of schema */ void copyIntoTableFromStage(JdbcDatabase database, - String stageName, String stagingPath, List stagedFiles, String tableName, String schemaName) throws Exception; - /** - * Remove files that were just staged - * - * @param database database used for syncing - * @param stageName name of staging area folder - * @param stagedFiles collection of the staging files to remove - */ - void cleanUpStage(JdbcDatabase database, String stageName, List stagedFiles) throws Exception; - /** * Delete the stage area and all staged files that was in it * diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/BlobStorageOperations.java b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/BlobStorageOperations.java index dfb0d0a50822..9df281e9e19b 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/BlobStorageOperations.java +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/BlobStorageOperations.java @@ -31,7 +31,7 @@ protected BlobStorageOperations() { * * @return the name of the file that was uploaded. */ - public abstract String uploadRecordsToBucket(SerializableBuffer recordsData, String namespace, String streamName, String objectPath) + public abstract String uploadRecordsToBucket(SerializableBuffer recordsData, String namespace, String objectPath) throws Exception; /** diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.java b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.java index ff0207cabdb8..38068dbf38c1 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.java +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3ConsumerFactory.java @@ -128,7 +128,6 @@ private FlushBufferFunction flushBufferFunction(final BlobStorageOperations stor writeConfig.addStoredFile(storageOperations.uploadRecordsToBucket( writer, writeConfig.getNamespace(), - writeConfig.getStreamName(), writeConfig.getFullOutputPath())); } catch (final Exception e) { LOGGER.error("Failed to flush and upload buffer to storage:", e); diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3StorageOperations.java b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3StorageOperations.java index 038a06bff987..9db0d0d4994a 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3StorageOperations.java +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/s3/S3StorageOperations.java @@ -120,7 +120,6 @@ protected boolean doesBucketExist(final String bucket) { @Override public String uploadRecordsToBucket(final SerializableBuffer recordsData, final String namespace, - final String streamName, final String objectPath) { final List exceptionsThrown = new ArrayList<>(); while (exceptionsThrown.size() < UPLOAD_RETRY_LIMIT) { diff --git a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.java b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.java index d3adf4ff43ce..564e3d3ade85 100644 --- a/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.java +++ b/airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/java/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.java @@ -103,19 +103,17 @@ public void flush(final StreamDescriptor decs, final Stream outputRecordCollector, final JdbcDatabase database, diff --git a/airbyte-integrations/connectors/destination-redshift/build.gradle b/airbyte-integrations/connectors/destination-redshift/build.gradle index 773a75a9ba12..33f09966a63d 100644 --- a/airbyte-integrations/connectors/destination-redshift/build.gradle +++ b/airbyte-integrations/connectors/destination-redshift/build.gradle @@ -4,12 +4,11 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.12.0' + cdkVersionRequired = '0.12.1' features = ['db-destinations', 's3-destinations', 'typing-deduping'] useLocalCdk = false } -//remove once upgrading the CDK version to 0.4.x or later java { compileJava { options.compilerArgs.remove("-Werror") diff --git a/airbyte-integrations/connectors/destination-redshift/metadata.yaml b/airbyte-integrations/connectors/destination-redshift/metadata.yaml index 9531fc7f51b1..93f27e28ecd8 100644 --- a/airbyte-integrations/connectors/destination-redshift/metadata.yaml +++ b/airbyte-integrations/connectors/destination-redshift/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: database connectorType: destination definitionId: f7a7d195-377f-cf5b-70a5-be6b819019dc - dockerImageTag: 0.7.14 + dockerImageTag: 0.7.15 dockerRepository: airbyte/destination-redshift documentationUrl: https://docs.airbyte.com/integrations/destinations/redshift githubIssueLabel: destination-redshift diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestination.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestination.java index 66e5e544093f..a4ba7a669557 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestination.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestination.java @@ -22,6 +22,7 @@ import io.airbyte.integrations.destination.redshift.operations.RedshiftSqlOperations; import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftDestinationHandler; import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftSqlGenerator; +import io.airbyte.integrations.destination.redshift.util.RedshiftUtil; import java.time.Duration; import java.util.HashMap; import java.util.Map; @@ -60,6 +61,11 @@ public DataSource getDataSource(final JsonNode config) { Duration.ofMinutes(2)); } + @Override + protected void destinationSpecificTableOperations(final JdbcDatabase database) throws Exception { + RedshiftUtil.checkSvvTableAccess(database); + } + @Override public JdbcDatabase getDatabase(final DataSource dataSource) { return new DefaultJdbcDatabase(dataSource); diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java index d82a22fe2eaa..e94118e279ee 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java @@ -51,6 +51,7 @@ import io.airbyte.integrations.destination.redshift.operations.RedshiftSqlOperations; import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftDestinationHandler; import io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftSqlGenerator; +import io.airbyte.integrations.destination.redshift.util.RedshiftUtil; import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; import io.airbyte.protocol.models.v0.AirbyteMessage; @@ -103,7 +104,8 @@ public AirbyteConnectionStatus check(final JsonNode config) { try { final JdbcDatabase database = new DefaultJdbcDatabase(dataSource); final String outputSchema = super.getNamingResolver().getIdentifier(config.get(JdbcUtils.SCHEMA_KEY).asText()); - attemptSQLCreateAndDropTableOperations(outputSchema, database, nameTransformer, redshiftS3StagingSqlOperations); + attemptTableOperations(outputSchema, database, nameTransformer, redshiftS3StagingSqlOperations, false); + RedshiftUtil.checkSvvTableAccess(database); return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); } catch (final ConnectionErrorException e) { final String message = getErrorMessage(e.getStateCode(), e.getErrorCode(), e.getExceptionMessage(), e); diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/operations/RedshiftS3StagingSqlOperations.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/operations/RedshiftS3StagingSqlOperations.java index b3adb95f8b35..9cf38f7ce4af 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/operations/RedshiftS3StagingSqlOperations.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/operations/RedshiftS3StagingSqlOperations.java @@ -52,19 +52,6 @@ public RedshiftS3StagingSqlOperations(final NamingConventionTransformer nameTran } } - /** - * I suspect this value is ignored. The stage name is eventually passed into - * {@link io.airbyte.cdk.integrations.destination.s3.S3StorageOperations#uploadRecordsToBucket(SerializableBuffer, String, String, String)} - * as the streamName parameter... which is completely ignored. - * - */ - @Override - public String getStageName(final String namespace, final String streamName) { - return nameTransformer.applyDefaultCase(String.join("_", - nameTransformer.convertStreamName(namespace), - nameTransformer.convertStreamName(streamName))); - } - @Override public String getStagingPath(final UUID connectionId, final String namespace, @@ -84,9 +71,7 @@ public String getStagingPath(final UUID connectionId, } @Override - public void createStageIfNotExists(final JdbcDatabase database, final String stageName) throws Exception { - final String bucketPath = s3Config.getBucketPath(); - final String prefix = bucketPath.isEmpty() ? "" : bucketPath + (bucketPath.endsWith("/") ? "" : "/"); + public void createStageIfNotExists() throws Exception { s3StorageOperations.createBucketIfNotExists(); } @@ -94,10 +79,9 @@ public void createStageIfNotExists(final JdbcDatabase database, final String sta public String uploadRecordsToStage(final JdbcDatabase database, final SerializableBuffer recordsData, final String schemaName, - final String stageName, final String stagingPath) throws Exception { - return s3StorageOperations.uploadRecordsToBucket(recordsData, schemaName, stageName, stagingPath); + return s3StorageOperations.uploadRecordsToBucket(recordsData, schemaName, stagingPath); } private String putManifest(final String manifestContents, final String stagingPath) { @@ -108,7 +92,6 @@ private String putManifest(final String manifestContents, final String stagingPa @Override public void copyIntoTableFromStage(final JdbcDatabase database, - final String stageName, final String stagingPath, final List stagedFiles, final String tableName, @@ -176,18 +159,9 @@ private static String getManifestPath(final String s3BucketName, final String s3 return "s3://" + s3BucketName + "/" + stagingPath + s3StagingFile; } - @Override - public void cleanUpStage(final JdbcDatabase database, final String stageName, final List stagedFiles) throws Exception { - final String bucketPath = s3Config.getBucketPath(); - final String prefix = bucketPath.isEmpty() ? "" : bucketPath + (bucketPath.endsWith("/") ? "" : "/"); - s3StorageOperations.cleanUpBucketObject(prefix + stageName, stagedFiles); - } - @Override public void dropStageIfExists(final JdbcDatabase database, final String stageName) throws Exception { - final String bucketPath = s3Config.getBucketPath(); - final String prefix = bucketPath.isEmpty() ? "" : bucketPath + (bucketPath.endsWith("/") ? "" : "/"); - s3StorageOperations.dropBucketObject(prefix + stageName); + s3StorageOperations.dropBucketObject(stageName); } } diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/util/RedshiftUtil.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/util/RedshiftUtil.java index c1433c4aa226..6551820a4831 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/util/RedshiftUtil.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/util/RedshiftUtil.java @@ -7,10 +7,13 @@ import static io.airbyte.integrations.destination.redshift.constants.RedshiftDestinationConstants.UPLOADING_METHOD; import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import lombok.extern.log4j.Log4j2; /** * Helper class for Destination Redshift connector. */ +@Log4j2 public class RedshiftUtil { private RedshiftUtil() {} @@ -36,4 +39,9 @@ private static boolean isNullOrEmpty(final JsonNode jsonNode) { return null == jsonNode || "".equals(jsonNode.asText()); } + public static void checkSvvTableAccess(final JdbcDatabase database) throws Exception { + log.info("checking SVV_TABLE_INFO permissions"); + database.queryJsons("SELECT 1 FROM SVV_TABLE_INFO LIMIT 1;"); + } + } diff --git a/docs/integrations/destinations/redshift.md b/docs/integrations/destinations/redshift.md index 63553b1d06d8..18f338d83861 100644 --- a/docs/integrations/destinations/redshift.md +++ b/docs/integrations/destinations/redshift.md @@ -94,12 +94,26 @@ connection only. S3 is secured through public HTTPS access only. 5. (Optional) [Create](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html) a staging S3 bucket \(for the COPY strategy\). -6. Create a user with at least create table permissions for the schema. If the schema does not exist - you need to add permissions for that, too. Something like this: -``` -GRANT CREATE ON DATABASE database_name TO airflow_user; -- add create schema permission -GRANT usage, create on schema my_schema TO airflow_user; -- add create table permission +### Permissions in Redshift +Airbyte writes data into two schemas, whichever schema you want your data to land in, e.g. `my_schema` +and a "Raw Data" schema that Airbyte uses to improve ELT reliability. By default, this raw data schema +is `airbyte_internal` but this can be overridden in the Redshift Destination's advanced settings. +Airbyte also needs to query Redshift's +[SVV_TABLE_INFO](https://docs.aws.amazon.com/redshift/latest/dg/r_SVV_TABLE_INFO.html) table for +metadata about the tables airbyte manages. + +To ensure the `airbyte_user` has the correction permissions to: +- create schemas in your database +- grant usage to any existing schemas you want Airbyte to use +- grant select to the `svv_table_info` table + +You can execute the following SQL statements + +```sql +GRANT CREATE ON DATABASE database_name TO airbyte_user; -- add create schema permission +GRANT usage, create on schema my_schema TO airbyte_user; -- add create table permission +GRANT SELECT ON TABLE SVV_TABLE_INFO TO airbyte_user; -- add select permission for svv_table_info ``` ### Optional Use of SSH Bastion Host @@ -215,6 +229,7 @@ Each stream will be output into its own raw table in Redshift. Each table will c | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.7.15 | 2024-01-11 | [\#34186](https://github.com/airbytehq/airbyte/pull/34186) | Update check method with svv_table_info permission check, fix bug where s3 staging files were not being deleted. | | 0.7.14 | 2024-01-08 | [\#34014](https://github.com/airbytehq/airbyte/pull/34014) | Update order of options in spec | | 0.7.13 | 2024-01-05 | [\#33948](https://github.com/airbytehq/airbyte/pull/33948) | Fix NPE when prepare tables fail; Add case sensitive session for super; Bastion heartbeats added | | 0.7.12 | 2024-01-03 | [\#33924](https://github.com/airbytehq/airbyte/pull/33924) | Add new ap-southeast-3 AWS region | From b03d785c41a197c276e034767117ac79f68a8079 Mon Sep 17 00:00:00 2001 From: Augustin Date: Tue, 16 Jan 2024 23:15:49 +0100 Subject: [PATCH 03/12] airbyte-ci: pass extra options after gradle tasks (#34301) --- airbyte-ci/connectors/pipelines/README.md | 1 + .../pipelines/airbyte_ci/steps/gradle.py | 22 ++++++++----------- .../connectors/pipelines/pyproject.toml | 2 +- .../connectors/pipelines/tests/test_gradle.py | 12 +--------- 4 files changed, 12 insertions(+), 25 deletions(-) diff --git a/airbyte-ci/connectors/pipelines/README.md b/airbyte-ci/connectors/pipelines/README.md index ede98ed3dd51..4c8a85289eaa 100644 --- a/airbyte-ci/connectors/pipelines/README.md +++ b/airbyte-ci/connectors/pipelines/README.md @@ -543,6 +543,7 @@ E.G.: running `pytest` on a specific test folder: | Version | PR | Description | | ------- | ---------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| 3.4.2 | [#34301](https://github.com/airbytehq/airbyte/pull/34301) | Pass extra params after Gradle tasks. | | 3.4.1 | [#34067](https://github.com/airbytehq/airbyte/pull/34067) | Use dagster-cloud 1.5.7 for deploy | | 3.4.0 | [#34276](https://github.com/airbytehq/airbyte/pull/34276) | Introduce `--only-step` option for connector tests. | | 3.3.0 | [#34218](https://github.com/airbytehq/airbyte/pull/34218) | Introduce `--ci-requirements` option for client defined CI runners. | diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py index ae44de953449..08b110dabc0a 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/steps/gradle.py @@ -3,7 +3,7 @@ # from abc import ABC -from typing import Any, ClassVar, List +from typing import Any, ClassVar, List, Optional, Tuple import pipelines.dagger.actions.system.docker from dagger import CacheSharingMode, CacheVolume @@ -11,7 +11,7 @@ from pipelines.consts import AMAZONCORRETTO_IMAGE from pipelines.dagger.actions import secrets from pipelines.helpers.utils import sh_dash_c -from pipelines.models.steps import STEP_PARAMS, Step, StepResult +from pipelines.models.steps import Step, StepResult class GradleTask(Step, ABC): @@ -30,20 +30,15 @@ class GradleTask(Step, ABC): LOCAL_MAVEN_REPOSITORY_PATH = "/root/.m2" GRADLE_DEP_CACHE_PATH = "/root/gradle-cache" GRADLE_HOME_PATH = "/root/.gradle" - STATIC_GRADLE_TASK_OPTIONS = ("--no-daemon", "--no-watch-fs") + STATIC_GRADLE_OPTIONS = ("--no-daemon", "--no-watch-fs", "--build-cache", "--scan", "--console=plain") gradle_task_name: ClassVar[str] bind_to_docker_host: ClassVar[bool] = False mount_connector_secrets: ClassVar[bool] = False accept_extra_params = True @property - def default_params(self) -> STEP_PARAMS: - return super().default_params | { - "-Ds3BuildCachePrefix": [self.context.connector.technical_name], # Set the S3 build cache prefix. - "--build-cache": [], # Enable the gradle build cache. - "--scan": [], # Enable the gradle build scan. - "--console": ["plain"], # Disable the gradle rich console. - } + def gradle_task_options(self) -> Tuple[str, ...]: + return self.STATIC_GRADLE_OPTIONS + (f"-Ds3BuildCachePrefix={self.context.connector.technical_name}",) @property def dependency_cache_volume(self) -> CacheVolume: @@ -64,8 +59,9 @@ def build_include(self) -> List[str]: for dependency_directory in self.context.connector.get_local_dependency_paths(with_test_dependencies=True) ] - def _get_gradle_command(self, task: str, *args: Any) -> str: - return f"./gradlew {' '.join(self.STATIC_GRADLE_TASK_OPTIONS + args)} {task}" + def _get_gradle_command(self, task: str, *args: Any, task_options: Optional[List[str]] = None) -> str: + task_options = task_options or [] + return f"./gradlew {' '.join(self.gradle_task_options + args)} {task} {' '.join(task_options)}" async def _run(self, *args: Any, **kwargs: Any) -> StepResult: include = [ @@ -200,7 +196,7 @@ async def _run(self, *args: Any, **kwargs: Any) -> StepResult: # Warm the gradle cache. f"(rsync -a --stats --mkpath {self.GRADLE_DEP_CACHE_PATH}/ {self.GRADLE_HOME_PATH} || true)", # Run the gradle task. - self._get_gradle_command(connector_task, *self.params_as_cli_options), + self._get_gradle_command(connector_task, task_options=self.params_as_cli_options), ] ) ) diff --git a/airbyte-ci/connectors/pipelines/pyproject.toml b/airbyte-ci/connectors/pipelines/pyproject.toml index ab036fe10fa4..79f37a9efd60 100644 --- a/airbyte-ci/connectors/pipelines/pyproject.toml +++ b/airbyte-ci/connectors/pipelines/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pipelines" -version = "3.4.1" +version = "3.4.2" description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines" authors = ["Airbyte "] diff --git a/airbyte-ci/connectors/pipelines/tests/test_gradle.py b/airbyte-ci/connectors/pipelines/tests/test_gradle.py index 5e867c3582ba..34312ec1d0d3 100644 --- a/airbyte-ci/connectors/pipelines/tests/test_gradle.py +++ b/airbyte-ci/connectors/pipelines/tests/test_gradle.py @@ -39,18 +39,8 @@ async def test_build_include(self, test_context): def test_params(self, test_context): step = self.DummyStep(test_context) + step.extra_params = {"-x": ["dummyTask", "dummyTask2"]} assert set(step.params_as_cli_options) == { - f"-Ds3BuildCachePrefix={test_context.connector.technical_name}", - "--build-cache", - "--scan", - "--console=plain", - } - step.extra_params = {"-x": ["dummyTask", "dummyTask2"], "--console": ["rich"]} - assert set(step.params_as_cli_options) == { - f"-Ds3BuildCachePrefix={test_context.connector.technical_name}", - "--build-cache", - "--scan", - "--console=rich", "-x=dummyTask", "-x=dummyTask2", } From cbbbeb92b83d3ef2836a46cfd10bf7e4109a7c90 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Tue, 16 Jan 2024 16:46:51 -0800 Subject: [PATCH 04/12] AirbyteLib: Python lint cleanup (#34223) --- airbyte-lib/airbyte_lib/__init__.py | 2 + airbyte-lib/airbyte_lib/_executor.py | 53 +++-- .../airbyte_lib/_factories/cache_factories.py | 5 +- .../_factories/connector_factories.py | 27 ++- airbyte-lib/airbyte_lib/_file_writers/base.py | 3 +- .../airbyte_lib/_file_writers/parquet.py | 8 +- airbyte-lib/airbyte_lib/_processors.py | 8 +- .../airbyte_lib/_util/protocol_util.py | 8 +- airbyte-lib/airbyte_lib/caches/base.py | 32 +-- airbyte-lib/airbyte_lib/caches/duckdb.py | 7 +- airbyte-lib/airbyte_lib/caches/postgres.py | 3 +- airbyte-lib/airbyte_lib/datasets/_lazy.py | 8 +- airbyte-lib/airbyte_lib/registry.py | 9 +- airbyte-lib/airbyte_lib/source.py | 136 ++++++------ airbyte-lib/airbyte_lib/telemetry.py | 12 +- airbyte-lib/airbyte_lib/types.py | 1 + airbyte-lib/airbyte_lib/validate.py | 22 +- airbyte-lib/docs.py | 12 +- airbyte-lib/docs/generated/airbyte_lib.html | 36 ++-- .../docs/generated/airbyte_lib/factories.html | 7 - airbyte-lib/poetry.lock | 194 +++++++++++++++--- airbyte-lib/pyproject.toml | 103 ++++++++-- airbyte-lib/tests/lint_tests/test_ruff.py | 9 - 23 files changed, 468 insertions(+), 237 deletions(-) delete mode 100644 airbyte-lib/docs/generated/airbyte_lib/factories.html diff --git a/airbyte-lib/airbyte_lib/__init__.py b/airbyte-lib/airbyte_lib/__init__.py index 8ba1300c6973..895849f19771 100644 --- a/airbyte-lib/airbyte_lib/__init__.py +++ b/airbyte-lib/airbyte_lib/__init__.py @@ -1,3 +1,5 @@ +"""AirbyteLib brings Airbyte ELT to every Python developer.""" + from airbyte_lib._factories.cache_factories import get_default_cache, new_local_cache from airbyte_lib._factories.connector_factories import get_connector from airbyte_lib.datasets import CachedDataset diff --git a/airbyte-lib/airbyte_lib/_executor.py b/airbyte-lib/airbyte_lib/_executor.py index 6d0c6625bb18..1a816cc46848 100644 --- a/airbyte-lib/airbyte_lib/_executor.py +++ b/airbyte-lib/airbyte_lib/_executor.py @@ -1,18 +1,22 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from __future__ import annotations -import os import subprocess import sys from abc import ABC, abstractmethod -from collections.abc import Generator, Iterable, Iterator from contextlib import contextmanager from pathlib import Path -from typing import IO, Any, NoReturn +from typing import IO, TYPE_CHECKING, Any, NoReturn -from airbyte_lib.registry import ConnectorMetadata from airbyte_lib.telemetry import SourceTelemetryInfo, SourceType +if TYPE_CHECKING: + from collections.abc import Generator, Iterable, Iterator + + from airbyte_lib.registry import ConnectorMetadata + + _LATEST_VERSION = "latest" @@ -89,7 +93,7 @@ def _stream_from_file(file: IO[str]) -> Generator[str, Any, None]: exit_code = process.wait() # If the exit code is not 0 or -15 (SIGTERM), raise an exception - if exit_code != 0 and exit_code != -15: + if exit_code not in (0, -15): raise Exception(f"Process exited with code {exit_code}") @@ -98,8 +102,9 @@ def __init__( self, metadata: ConnectorMetadata, target_version: str | None = None, - install_if_missing: bool = False, pip_url: str | None = None, + *, + install_if_missing: bool = False, ) -> None: super().__init__(metadata, target_version) self.install_if_missing = install_if_missing @@ -122,26 +127,28 @@ def _run_subprocess_and_raise_on_failure(self, args: list[str]) -> None: def uninstall(self) -> None: venv_name = self._get_venv_name() - if os.path.exists(venv_name): + if Path(venv_name).exists(): self._run_subprocess_and_raise_on_failure(["rm", "-rf", venv_name]) def install(self) -> None: venv_name = self._get_venv_name() self._run_subprocess_and_raise_on_failure([sys.executable, "-m", "venv", venv_name]) - pip_path = os.path.join(venv_name, "bin", "pip") + pip_path = str(Path(venv_name) / "bin" / "pip") self._run_subprocess_and_raise_on_failure([pip_path, "install", "-e", self.pip_url]) def _get_installed_version(self) -> str: - """ - In the venv, run the following: python -c "from importlib.metadata import version; print(version(''))" + """Detect the version of the connector installed. + + In the venv, we run the following: + > python -c "from importlib.metadata import version; print(version(''))" """ venv_name = self._get_venv_name() connector_name = self.metadata.name return subprocess.check_output( [ - os.path.join(venv_name, "bin", "python"), + Path(venv_name) / "bin" / "python", "-c", f"from importlib.metadata import version; print(version('{connector_name}'))", ], @@ -151,8 +158,8 @@ def _get_installed_version(self) -> str: def ensure_installation( self, ) -> None: - """ - Ensure that the connector is installed in a virtual environment. + """Ensure that the connector is installed in a virtual environment. + If not yet installed and if install_if_missing is True, then install. Optionally, verify that the installed version matches the target version. @@ -165,14 +172,16 @@ def ensure_installation( if not venv_path.exists(): if not self.install_if_missing: raise Exception( - f"Connector {self.metadata.name} is not available - venv {venv_name} does not exist" + f"Connector {self.metadata.name} is not available - " + f"venv {venv_name} does not exist" ) self.install() connector_path = self._get_connector_path() if not connector_path.exists(): raise FileNotFoundError( - f"Could not find connector '{self.metadata.name}' in venv '{venv_name}' with connector path '{connector_path}'.", + f"Could not find connector '{self.metadata.name}' in venv '{venv_name}' with " + f"connector path '{connector_path}'.", ) if self.enforce_version: @@ -185,13 +194,14 @@ def ensure_installation( version_after_install = self._get_installed_version() if version_after_install != self.target_version: raise Exception( - f"Failed to install connector {self.metadata.name} version {self.target_version}. Installed version is {version_after_install}", + f"Failed to install connector {self.metadata.name} version " + f"{self.target_version}. Installed version is {version_after_install}", ) def execute(self, args: list[str]) -> Iterator[str]: connector_path = self._get_connector_path() - with _stream_from_subprocess([str(connector_path)] + args) as stream: + with _stream_from_subprocess([str(connector_path), *args]) as stream: yield from stream def get_telemetry_info(self) -> SourceTelemetryInfo: @@ -204,19 +214,20 @@ def ensure_installation(self) -> None: self.execute(["spec"]) except Exception as e: raise Exception( - f"Connector {self.metadata.name} is not available - executing it failed: {e}" - ) + f"Connector {self.metadata.name} is not available - executing it failed" + ) from e def install(self) -> NoReturn: raise Exception(f"Connector {self.metadata.name} is not available - cannot install it") def uninstall(self) -> NoReturn: raise Exception( - f"Connector {self.metadata.name} is installed manually and not managed by airbyte-lib - please remove it manually" + f"Connector {self.metadata.name} is installed manually and not managed by airbyte-lib -" + " please remove it manually" ) def execute(self, args: list[str]) -> Iterator[str]: - with _stream_from_subprocess([self.metadata.name] + args) as stream: + with _stream_from_subprocess([self.metadata.name, *args]) as stream: yield from stream def get_telemetry_info(self) -> SourceTelemetryInfo: diff --git a/airbyte-lib/airbyte_lib/_factories/cache_factories.py b/airbyte-lib/airbyte_lib/_factories/cache_factories.py index ea863b7bdb00..5a95dce2db7b 100644 --- a/airbyte-lib/airbyte_lib/_factories/cache_factories.py +++ b/airbyte-lib/airbyte_lib/_factories/cache_factories.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. - +from __future__ import annotations from pathlib import Path @@ -23,13 +23,14 @@ def get_default_cache() -> DuckDBCache: def new_local_cache( cache_name: str | None = None, cache_dir: str | Path | None = None, + *, cleanup: bool = True, ) -> DuckDBCache: """Get a local cache for storing data, using a name string to seed the path. Args: cache_name: Name to use for the cache. Defaults to None. - root_dir: Root directory to store the cache in. Defaults to None. + cache_dir: Root directory to store the cache in. Defaults to None. cleanup: Whether to clean up temporary files. Defaults to True. Cache files are stored in the `.cache` directory, relative to the current diff --git a/airbyte-lib/airbyte_lib/_factories/connector_factories.py b/airbyte-lib/airbyte_lib/_factories/connector_factories.py index 06482d67aa1c..347710f20824 100644 --- a/airbyte-lib/airbyte_lib/_factories/connector_factories.py +++ b/airbyte-lib/airbyte_lib/_factories/connector_factories.py @@ -1,5 +1,5 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. - +from __future__ import annotations from typing import Any @@ -13,17 +13,26 @@ def get_connector( version: str | None = None, pip_url: str | None = None, config: dict[str, Any] | None = None, + *, use_local_install: bool = False, install_if_missing: bool = True, ) -> Source: - """ - Get a connector by name and version. - :param name: connector name - :param version: connector version - if not provided, the currently installed version will be used. If no version is installed, the latest available version will be used. The version can also be set to "latest" to force the use of the latest available version. - :param pip_url: connector pip URL - if not provided, the pip url will be inferred from the connector name. - :param config: connector config - if not provided, you need to set it later via the set_config method. - :param use_local_install: whether to use a virtual environment to run the connector. If True, the connector is expected to be available on the path (e.g. installed via pip). If False, the connector will be installed automatically in a virtual environment. - :param install_if_missing: whether to install the connector if it is not available locally. This parameter is ignored if use_local_install is True. + """Get a connector by name and version. + + Args: + name: connector name + version: connector version - if not provided, the currently installed version will be used. + If no version is installed, the latest available version will be used. The version can + also be set to "latest" to force the use of the latest available version. + pip_url: connector pip URL - if not provided, the pip url will be inferred from the + connector name. + config: connector config - if not provided, you need to set it later via the set_config + method. + use_local_install: whether to use a virtual environment to run the connector. If True, the + connector is expected to be available on the path (e.g. installed via pip). If False, + the connector will be installed automatically in a virtual environment. + install_if_missing: whether to install the connector if it is not available locally. This + parameter is ignored if use_local_install is True. """ metadata = get_connector_metadata(name) if use_local_install: diff --git a/airbyte-lib/airbyte_lib/_file_writers/base.py b/airbyte-lib/airbyte_lib/_file_writers/base.py index a4913f0f7bb3..3f16953f12f5 100644 --- a/airbyte-lib/airbyte_lib/_file_writers/base.py +++ b/airbyte-lib/airbyte_lib/_file_writers/base.py @@ -53,8 +53,7 @@ def _write_batch( batch_id: str, record_batch: pa.Table | pa.RecordBatch, ) -> FileWriterBatchHandle: - """ - Process a record batch. + """Process a record batch. Return a list of paths to one or more cache files. """ diff --git a/airbyte-lib/airbyte_lib/_file_writers/parquet.py b/airbyte-lib/airbyte_lib/_file_writers/parquet.py index 201fb4952eef..aeb2113f2a28 100644 --- a/airbyte-lib/airbyte_lib/_file_writers/parquet.py +++ b/airbyte-lib/airbyte_lib/_file_writers/parquet.py @@ -1,6 +1,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. """A Parquet cache implementation.""" +from __future__ import annotations from pathlib import Path from typing import cast @@ -16,8 +17,7 @@ class ParquetWriterConfig(FileWriterConfigBase): """Configuration for the Snowflake cache.""" - # Inherits from base class: - # cache_dir: str | Path + # Inherits `cache_dir` from base class class ParquetWriter(FileWriterBase): @@ -44,11 +44,11 @@ def _write_batch( batch_id: str, record_batch: pa.Table | pa.RecordBatch, ) -> FileWriterBatchHandle: - """ - Process a record batch. + """Process a record batch. Return the path to the cache file. """ + _ = batch_id # unused output_file_path = self.get_new_cache_file_path(stream_name) with parquet.ParquetWriter(output_file_path, record_batch.schema) as writer: diff --git a/airbyte-lib/airbyte_lib/_processors.py b/airbyte-lib/airbyte_lib/_processors.py index f105bc248864..f0f94c30c512 100644 --- a/airbyte-lib/airbyte_lib/_processors.py +++ b/airbyte-lib/airbyte_lib/_processors.py @@ -104,8 +104,7 @@ def process_stdin( self, max_batch_size: int = DEFAULT_BATCH_SIZE, ) -> None: - """ - Process the input stream from stdin. + """Process the input stream from stdin. Return a list of summaries for testing. """ @@ -126,8 +125,7 @@ def process_input_stream( input_stream: io.TextIOBase, max_batch_size: int = DEFAULT_BATCH_SIZE, ) -> None: - """ - Parse the input stream and process data in batches. + """Parse the input stream and process data in batches. Return a list of summaries for testing. """ @@ -229,7 +227,7 @@ def _cleanup_batch( # noqa: B027 # Intentionally empty, not abstract For instance, file writers can override this method to delete the files created. Caches, similarly, can override this method to delete any other temporary artifacts. """ - pass # noqa: PIE790 # Intentional no-op + pass def _new_batch_id(self) -> str: """Return a new batch handle.""" diff --git a/airbyte-lib/airbyte_lib/_util/protocol_util.py b/airbyte-lib/airbyte_lib/_util/protocol_util.py index 56b28b2c628a..58ada9f5435b 100644 --- a/airbyte-lib/airbyte_lib/_util/protocol_util.py +++ b/airbyte-lib/airbyte_lib/_util/protocol_util.py @@ -1,9 +1,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. """Internal utility functions, especially for dealing with Airbyte Protocol.""" +from __future__ import annotations -from collections.abc import Iterable, Iterator -from typing import Any, cast +from typing import TYPE_CHECKING, Any, cast from airbyte_protocol.models import ( AirbyteMessage, @@ -13,6 +13,10 @@ ) +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator + + def airbyte_messages_to_record_dicts( messages: Iterable[AirbyteMessage], ) -> Iterator[dict[str, Any]]: diff --git a/airbyte-lib/airbyte_lib/caches/base.py b/airbyte-lib/airbyte_lib/caches/base.py index bc92ea17a743..0718220a4a18 100644 --- a/airbyte-lib/airbyte_lib/caches/base.py +++ b/airbyte-lib/airbyte_lib/caches/base.py @@ -1,13 +1,13 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. """A SQL Cache implementation.""" +from __future__ import annotations import abc import enum from collections.abc import Generator, Iterator, Mapping from contextlib import contextmanager -from functools import cached_property, lru_cache -from pathlib import Path +from functools import cached_property from typing import TYPE_CHECKING, Any, cast, final import pandas as pd @@ -16,23 +16,24 @@ import ulid from overrides import overrides from sqlalchemy import CursorResult, Executable, TextClause, create_engine, text -from sqlalchemy.engine import Engine from sqlalchemy.pool import StaticPool -from airbyte_protocol.models import ConfiguredAirbyteStream - from airbyte_lib._file_writers.base import FileWriterBase, FileWriterBatchHandle from airbyte_lib._processors import BatchHandle, RecordProcessor from airbyte_lib.config import CacheConfigBase -from airbyte_lib.telemetry import CacheTelemetryInfo from airbyte_lib.types import SQLTypeConverter if TYPE_CHECKING: - from sqlalchemy.engine import Connection + from pathlib import Path + + from sqlalchemy.engine import Connection, Engine from sqlalchemy.engine.reflection import Inspector + from airbyte_protocol.models import ConfiguredAirbyteStream + from airbyte_lib.datasets._base import DatasetBase + from airbyte_lib.telemetry import CacheTelemetryInfo DEBUG_MODE = False # Set to True to enable additional debug logging. @@ -207,7 +208,7 @@ def get_sql_table( @property def streams( self, - ) -> dict[str, "DatasetBase"]: + ) -> dict[str, DatasetBase]: """Return a temporary table name.""" # TODO: Add support for streams map, based on the cached catalog. raise NotImplementedError("Streams map is not yet supported.") @@ -253,7 +254,7 @@ def _ensure_schema_exists( try: self._execute_sql(sql) - except Exception as ex: # noqa: BLE001 # Too-wide catch because we don't know what the DB will throw. + except Exception as ex: # Ignore schema exists errors. if "already exists" not in str(ex): raise @@ -279,7 +280,6 @@ def _fully_qualified( table_name: str, ) -> str: """Return the fully qualified name of the given table.""" - # return f"{self.database_name}.{self.config.schema_name}.{table_name}" return f"{self.config.schema_name}.{table_name}" @final @@ -325,10 +325,10 @@ def _get_schemas_list( def _ensure_final_table_exists( self, stream_name: str, + *, create_if_missing: bool = True, ) -> str: - """ - Create the final table if it doesn't already exist. + """Create the final table if it doesn't already exist. Return the table name. """ @@ -349,6 +349,7 @@ def _ensure_compatible_table_schema( self, stream_name: str, table_name: str, + *, raise_on_error: bool = False, ) -> bool: """Return true if the given table is compatible with the stream's schema. @@ -460,8 +461,7 @@ def _write_batch( batch_id: str, record_batch: pa.Table | pa.RecordBatch, ) -> FileWriterBatchHandle: - """ - Process a record batch. + """Process a record batch. Return the path to the cache file. """ @@ -559,6 +559,7 @@ def _execute_sql(self, sql: str | TextClause | Executable) -> CursorResult: def _drop_temp_table( self, table_name: str, + *, if_exists: bool = True, ) -> None: """Drop the given table.""" @@ -592,7 +593,7 @@ def _write_files_to_new_table( schema=self.config.schema_name, if_exists="append", index=False, - dtype=self._get_sql_column_definitions(stream_name), # type: ignore + dtype=self._get_sql_column_definitions(stream_name), # type: ignore[arg-type] ) return temp_table_name @@ -649,7 +650,6 @@ def _append_temp_table_to_final_table( """, ) - @lru_cache def _get_primary_keys( self, stream_name: str, diff --git a/airbyte-lib/airbyte_lib/caches/duckdb.py b/airbyte-lib/airbyte_lib/caches/duckdb.py index 067275658466..0d6ba6efe38a 100644 --- a/airbyte-lib/airbyte_lib/caches/duckdb.py +++ b/airbyte-lib/airbyte_lib/caches/duckdb.py @@ -115,6 +115,7 @@ def _ensure_compatible_table_schema( self, stream_name: str, table_name: str, + *, raise_on_error: bool = True, ) -> bool: """Return true if the given table is compatible with the stream's schema. @@ -122,7 +123,11 @@ def _ensure_compatible_table_schema( In addition to the base implementation, this also checks primary keys. """ # call super - if not super()._ensure_compatible_table_schema(stream_name, table_name, raise_on_error): + if not super()._ensure_compatible_table_schema( + stream_name=stream_name, + table_name=table_name, + raise_on_error=raise_on_error, + ): return False pk_cols = self._get_primary_keys(stream_name) diff --git a/airbyte-lib/airbyte_lib/caches/postgres.py b/airbyte-lib/airbyte_lib/caches/postgres.py index c833190e00ab..2ff36d2dbfea 100644 --- a/airbyte-lib/airbyte_lib/caches/postgres.py +++ b/airbyte-lib/airbyte_lib/caches/postgres.py @@ -23,8 +23,7 @@ class PostgresCacheConfig(SQLCacheConfigBase, ParquetWriterConfig): password: str database: str - # Already defined in base class: - # schema_name: str + # Already defined in base class: `schema_name` @overrides def get_sql_alchemy_url(self) -> str: diff --git a/airbyte-lib/airbyte_lib/datasets/_lazy.py b/airbyte-lib/airbyte_lib/datasets/_lazy.py index e2c22dd0a058..e08f844c4078 100644 --- a/airbyte-lib/airbyte_lib/datasets/_lazy.py +++ b/airbyte-lib/airbyte_lib/datasets/_lazy.py @@ -1,7 +1,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from __future__ import annotations -from collections.abc import Callable, Iterator -from typing import Any +from typing import TYPE_CHECKING, Any from overrides import overrides from typing_extensions import Self @@ -9,6 +9,10 @@ from airbyte_lib.datasets import DatasetBase +if TYPE_CHECKING: + from collections.abc import Callable, Iterator + + class LazyDataset(DatasetBase): """A dataset that is loaded incrementally from a source or a SQL query. diff --git a/airbyte-lib/airbyte_lib/registry.py b/airbyte-lib/airbyte_lib/registry.py index baa4917959bb..e0afdbaf2c3a 100644 --- a/airbyte-lib/airbyte_lib/registry.py +++ b/airbyte-lib/airbyte_lib/registry.py @@ -1,8 +1,10 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from __future__ import annotations import json import os from dataclasses import dataclass +from pathlib import Path import requests @@ -23,7 +25,7 @@ class ConnectorMetadata: def _update_cache() -> None: global _cache if os.environ.get("AIRBYTE_LOCAL_REGISTRY"): - with open(str(os.environ.get("AIRBYTE_LOCAL_REGISTRY"))) as f: + with Path(str(os.environ.get("AIRBYTE_LOCAL_REGISTRY"))).open() as f: data = json.load(f) else: response = requests.get( @@ -38,8 +40,9 @@ def _update_cache() -> None: def get_connector_metadata(name: str) -> ConnectorMetadata: - """ - check the cache for the connector. If the cache is empty, populate by calling update_cache + """Check the cache for the connector. + + If the cache is empty, populate by calling update_cache. """ if not _cache: _update_cache() diff --git a/airbyte-lib/airbyte_lib/source.py b/airbyte-lib/airbyte_lib/source.py index 0fcf6a535c35..415284994510 100644 --- a/airbyte-lib/airbyte_lib/source.py +++ b/airbyte-lib/airbyte_lib/source.py @@ -1,11 +1,10 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from __future__ import annotations import json import tempfile -from collections.abc import Generator, Iterable, Iterator -from contextlib import contextmanager -from functools import lru_cache -from typing import Any, Optional +from contextlib import contextmanager, suppress +from typing import TYPE_CHECKING, Any import jsonschema @@ -22,10 +21,8 @@ Type, ) -from airbyte_lib._executor import Executor from airbyte_lib._factories.cache_factories import get_default_cache from airbyte_lib._util import protocol_util # Internal utility functions -from airbyte_lib.caches import SQLCacheBase from airbyte_lib.results import ReadResult from airbyte_lib.telemetry import ( CacheTelemetryInfo, @@ -35,6 +32,13 @@ ) +if TYPE_CHECKING: + from collections.abc import Generator, Iterable, Iterator + + from airbyte_lib._executor import Executor + from airbyte_lib.caches import SQLCacheBase + + @contextmanager def as_temp_files(files: list[Any]) -> Generator[list[Any], Any, None]: temp_files: list[Any] = [] @@ -49,29 +53,29 @@ def as_temp_files(files: list[Any]) -> Generator[list[Any], Any, None]: yield [file.name for file in temp_files] finally: for temp_file in temp_files: - try: + with suppress(Exception): temp_file.close() - except Exception: - pass class Source: - """This class is representing a source that can be called""" + """A class representing a source that can be called.""" def __init__( self, executor: Executor, name: str, - config: Optional[dict[str, Any]] = None, - streams: Optional[list[str]] = None, - ): + config: dict[str, Any] | None = None, + streams: list[str] | None = None, + ) -> None: self._processed_records = 0 self.executor = executor self.name = name - self.streams: Optional[list[str]] = None + self.streams: list[str] | None = None self._processed_records = 0 - self._config_dict: Optional[dict[str, Any]] = None + self._config_dict: dict[str, Any] | None = None self._last_log_messages: list[str] = [] + self._discovered_catalog: AirbyteCatalog | None = None + self._spec: ConnectorSpecification | None = None if config is not None: self.set_config(config) if streams is not None: @@ -82,7 +86,8 @@ def set_streams(self, streams: list[str]) -> None: for stream in streams: if stream not in available_streams: raise Exception( - f"Stream {stream} is not available for connector {self.name}, choose from {available_streams}", + f"Stream {stream} is not available for connector {self.name}. " + f"Choose from: {available_streams}", ) self.streams = streams @@ -99,8 +104,7 @@ def _config(self) -> dict[str, Any]: return self._config_dict def _discover(self) -> AirbyteCatalog: - """ - Call discover on the connector. + """Call discover on the connector. This involves the following steps: * Write the config to a temporary file @@ -117,51 +121,46 @@ def _discover(self) -> AirbyteCatalog: ) def _validate_config(self, config: dict[str, Any]) -> None: - """ - Validate the config against the spec. - """ - spec = self._spec() + """Validate the config against the spec.""" + spec = self._get_spec(force_refresh=False) jsonschema.validate(config, spec.connectionSpecification) def get_available_streams(self) -> list[str]: - """ - Get the available streams from the spec. - """ + """Get the available streams from the spec.""" return [s.name for s in self._discover().streams] - @lru_cache(maxsize=1) - def _spec(self) -> ConnectorSpecification: - """ - Call spec on the connector. + def _get_spec(self, *, force_refresh: bool = False) -> ConnectorSpecification: + """Call spec on the connector. This involves the following steps: * execute the connector with spec * Listen to the messages and return the first AirbyteCatalog that comes along. * Make sure the subprocess is killed when the function returns. """ - for msg in self._execute(["spec"]): - if msg.type == Type.SPEC and msg.spec: - return msg.spec + if force_refresh or self._spec is None: + for msg in self._execute(["spec"]): + if msg.type == Type.SPEC and msg.spec: + self._spec = msg.spec + break + + if self._spec: + return self._spec + raise Exception( f"Connector did not return a spec. Last logs: {self._last_log_messages}", ) @property - @lru_cache(maxsize=1) def raw_catalog(self) -> AirbyteCatalog: - """ - Get the raw catalog for the given streams. - """ - catalog = self._discover() - return catalog + """Get the raw catalog for the given streams.""" + return self._discover() @property - @lru_cache(maxsize=1) def configured_catalog(self) -> ConfiguredAirbyteCatalog: - """ - Get the configured catalog for the given streams. - """ - catalog = self._discover() + """Get the configured catalog for the given streams.""" + if self._discovered_catalog is None: + self._discovered_catalog = self._discover() + return ConfiguredAirbyteCatalog( streams=[ ConfiguredAirbyteStream( @@ -170,14 +169,13 @@ def configured_catalog(self) -> ConfiguredAirbyteCatalog: destination_sync_mode=DestinationSyncMode.overwrite, primary_key=None, ) - for s in catalog.streams + for s in self._discovered_catalog.streams if self.streams is None or s.name in self.streams ], ) def get_records(self, stream: str) -> Iterator[dict[str, Any]]: - """ - Read a stream from the connector. + """Read a stream from the connector. This involves the following steps: * Call discover to get the catalog @@ -211,8 +209,7 @@ def get_records(self, stream: str) -> Iterator[dict[str, Any]]: yield from iterator # TODO: Refactor to use LazyDataset here def check(self) -> None: - """ - Call check on the connector. + """Call check on the connector. This involves the following steps: * Write the config to a temporary file @@ -223,25 +220,22 @@ def check(self) -> None: with as_temp_files([self._config]) as [config_file]: for msg in self._execute(["check", "--config", config_file]): if msg.type == Type.CONNECTION_STATUS and msg.connectionStatus: - if msg.connectionStatus.status == Status.FAILED: - raise Exception( - f"Connector returned failed status: {msg.connectionStatus.message}", - ) - else: - return + if msg.connectionStatus.status != Status.FAILED: + return # Success! + + raise Exception( + f"Connector returned failed status: {msg.connectionStatus.message}", + ) raise Exception( f"Connector did not return check status. Last logs: {self._last_log_messages}", ) def install(self) -> None: - """ - Install the connector if it is not yet installed. - """ + """Install the connector if it is not yet installed.""" self.executor.install() def uninstall(self) -> None: - """ - Uninstall the connector if it is installed. + """Uninstall the connector if it is installed. This only works if the use_local_install flag wasn't used and installation is managed by airbyte-lib. @@ -278,14 +272,14 @@ def _read_with_catalog( cache_info: CacheTelemetryInfo, catalog: ConfiguredAirbyteCatalog, ) -> Iterator[AirbyteMessage]: - """ - Call read on the connector. + """Call read on the connector. This involves the following steps: * Write the config to a temporary file * execute the connector with read --config --catalog * Listen to the messages and return the AirbyteRecordMessages that come along. - * Send out telemetry on the performed sync (with information about which source was used and the type of the cache) + * Send out telemetry on the performed sync (with information about which source was used and + the type of the cache) """ source_tracking_information = self.executor.get_telemetry_info() send_telemetry(source_tracking_information, cache_info, SyncState.STARTED) @@ -294,15 +288,14 @@ def _read_with_catalog( config_file, catalog_file, ]: - for msg in self._execute( + yield from self._execute( ["read", "--config", config_file, "--catalog", catalog_file], - ): - yield msg - except Exception as e: + ) + except Exception: send_telemetry( source_tracking_information, cache_info, SyncState.FAILED, self._processed_records ) - raise e + raise finally: send_telemetry( source_tracking_information, @@ -316,15 +309,14 @@ def _add_to_logs(self, message: str) -> None: self._last_log_messages = self._last_log_messages[-10:] def _execute(self, args: list[str]) -> Iterator[AirbyteMessage]: - """ - Execute the connector with the given arguments. + """Execute the connector with the given arguments. This involves the following steps: * Locate the right venv. It is called ".venv-" * Spawn a subprocess with .venv-/bin/ - * Read the output line by line of the subprocess and serialize them AirbyteMessage objects. Drop if not valid. + * Read the output line by line of the subprocess and serialize them AirbyteMessage objects. + Drop if not valid. """ - self.executor.ensure_installation() try: @@ -338,7 +330,7 @@ def _execute(self, args: list[str]) -> Iterator[AirbyteMessage]: except Exception: self._add_to_logs(line) except Exception as e: - raise Exception(f"{e!s}. Last logs: {self._last_log_messages}") + raise Exception(f"Execution failed. Last logs: {self._last_log_messages}") from e def _tally_records( self, diff --git a/airbyte-lib/airbyte_lib/telemetry.py b/airbyte-lib/airbyte_lib/telemetry.py index fe797c9d8261..5ab816208383 100644 --- a/airbyte-lib/airbyte_lib/telemetry.py +++ b/airbyte-lib/airbyte_lib/telemetry.py @@ -1,11 +1,12 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from __future__ import annotations import datetime import os from contextlib import suppress from dataclasses import asdict, dataclass from enum import Enum -from typing import Any, Optional +from typing import Any import requests @@ -13,7 +14,8 @@ # TODO: Use production tracking key -TRACKING_KEY = "jxT1qP9WEKwR3vtKMwP9qKhfQEGFtIM1" or str(os.environ.get("AIRBYTE_TRACKING_KEY")) +# TODO: This 'or' is a no-op. Intentional? Should we switch order to prefer env var if available? +TRACKING_KEY = "jxT1qP9WEKwR3vtKMwP9qKhfQEGFtIM1" or str(os.environ.get("AIRBYTE_TRACKING_KEY")) # noqa: SIM222 class SourceType(str, Enum): @@ -39,20 +41,20 @@ class SyncState(str, Enum): class SourceTelemetryInfo: name: str type: SourceType - version: Optional[str] + version: str | None def send_telemetry( source_info: SourceTelemetryInfo, cache_info: CacheTelemetryInfo, state: SyncState, - number_of_records: Optional[int] = None, + number_of_records: int | None = None, ) -> None: # If DO_NOT_TRACK is set, we don't send any telemetry if os.environ.get("DO_NOT_TRACK"): return - current_time = datetime.datetime.utcnow().isoformat() + current_time: str = datetime.datetime.utcnow().isoformat() # noqa: DTZ003 # prefer now() over utcnow() payload: dict[str, Any] = { "anonymousId": "airbyte-lib-user", "event": "sync", diff --git a/airbyte-lib/airbyte_lib/types.py b/airbyte-lib/airbyte_lib/types.py index eb87740a75cf..ca34a5801e0b 100644 --- a/airbyte-lib/airbyte_lib/types.py +++ b/airbyte-lib/airbyte_lib/types.py @@ -1,6 +1,7 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. """Type conversion methods for SQL Caches.""" +from __future__ import annotations from typing import cast diff --git a/airbyte-lib/airbyte_lib/validate.py b/airbyte-lib/airbyte_lib/validate.py index f8aa646d86fc..8eac20e1692b 100644 --- a/airbyte-lib/airbyte_lib/validate.py +++ b/airbyte-lib/airbyte_lib/validate.py @@ -1,5 +1,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -"""Defines the `airbyte-lib-validate-source` CLI, which checks if connectors are compatible with airbyte-lib.""" +"""Defines the `airbyte-lib-validate-source` CLI. + +This tool checks if connectors are compatible with airbyte-lib. +""" import argparse import json @@ -40,7 +43,7 @@ def _run_subprocess_and_raise_on_failure(args: list[str]) -> None: def tests(connector_name: str, sample_config: str) -> None: print("Creating source and validating spec and version...") source = ab.get_connector( - # FIXME: noqa: SIM115, PTH123 + # TODO: FIXME: noqa: SIM115, PTH123 connector_name, config=json.load(open(sample_config)), # noqa: SIM115, PTH123 ) @@ -65,15 +68,16 @@ def tests(connector_name: str, sample_config: str) -> None: def run() -> None: - """ - This is a CLI entrypoint for the `airbyte-lib-validate-source` command. - It's called like this: airbyte-lib-validate-source —connector-dir . -—sample-config secrets/config.json + """Handle CLI entrypoint for the `airbyte-lib-validate-source` command. + + It's called like this: + > airbyte-lib-validate-source —connector-dir . -—sample-config secrets/config.json + It performs a basic smoke test to make sure the connector in question is airbyte-lib compliant: * Can be installed into a venv * Can be called via cli entrypoint - * Answers according to the Airbyte protocol when called with spec, check, discover and read + * Answers according to the Airbyte protocol when called with spec, check, discover and read. """ - # parse args args = _parse_args() connector_dir = args.connector_dir @@ -84,7 +88,7 @@ def run() -> None: def validate(connector_dir: str, sample_config: str) -> None: # read metadata.yaml metadata_path = Path(connector_dir) / "metadata.yaml" - with open(metadata_path) as stream: + with Path(metadata_path).open() as stream: metadata = yaml.safe_load(stream)["data"] # TODO: Use remoteRegistries.pypi.packageName once set for connectors @@ -96,7 +100,7 @@ def validate(connector_dir: str, sample_config: str) -> None: if not venv_path.exists(): _run_subprocess_and_raise_on_failure([sys.executable, "-m", "venv", venv_name]) - pip_path = os.path.join(venv_name, "bin", "pip") + pip_path = str(venv_path / "bin" / "pip") _run_subprocess_and_raise_on_failure([pip_path, "install", "-e", connector_dir]) diff --git a/airbyte-lib/docs.py b/airbyte-lib/docs.py index a80ad6e4a8a1..bfd30c05e554 100644 --- a/airbyte-lib/docs.py +++ b/airbyte-lib/docs.py @@ -7,20 +7,20 @@ import pdoc -def run(): - """ - Generate docs for all public modules in airbyte_lib and save them to docs/generated. +def run() -> None: + """Generate docs for all public modules in airbyte_lib and save them to docs/generated. + Public modules are: * The main airbyte_lib module - * All directory modules in airbyte_lib that don't start with an underscore + * All directory modules in airbyte_lib that don't start with an underscore. """ public_modules = ["airbyte_lib"] # recursively delete the docs/generated folder if it exists - if os.path.exists("docs/generated"): + if pathlib.Path("docs/generated").exists(): shutil.rmtree("docs/generated") - # determine all folders in airbyte_lib that don't start with an underscore and add them to public_modules + # All folders in `airbyte_lib` that don't start with "_" are treated as public modules. for d in os.listdir("airbyte_lib"): dir_path = pathlib.Path(f"airbyte_lib/{d}") if dir_path.is_dir() and not d.startswith("_"): diff --git a/airbyte-lib/docs/generated/airbyte_lib.html b/airbyte-lib/docs/generated/airbyte_lib.html index 15ce24f714ee..240492002ff8 100644 --- a/airbyte-lib/docs/generated/airbyte_lib.html +++ b/airbyte-lib/docs/generated/airbyte_lib.html @@ -4,7 +4,7 @@
def - get_connector( name: str, version: str | None = None, pip_url: str | None = None, config: dict[str, typing.Any] | None = None, use_local_install: bool = False, install_if_missing: bool = True) -> Source: + get_connector( name: str, version: str | None = None, pip_url: str | None = None, config: dict[str, typing.Any] | None = None, *, use_local_install: bool = False, install_if_missing: bool = True) -> Source:
@@ -12,16 +12,20 @@

Get a connector by name and version.

-
Parameters
- -
    -
  • name: connector name
  • -
  • version: connector version - if not provided, the currently installed version will be used. If no version is installed, the latest available version will be used. The version can also be set to "latest" to force the use of the latest available version.
  • -
  • pip_url: connector pip URL - if not provided, the pip url will be inferred from the connector name.
  • -
  • config: connector config - if not provided, you need to set it later via the set_config method.
  • -
  • use_local_install: whether to use a virtual environment to run the connector. If True, the connector is expected to be available on the path (e.g. installed via pip). If False, the connector will be installed automatically in a virtual environment.
  • -
  • install_if_missing: whether to install the connector if it is not available locally. This parameter is ignored if use_local_install is True.
  • -
+

Args: + name: connector name + version: connector version - if not provided, the currently installed version will be used. + If no version is installed, the latest available version will be used. The version can + also be set to "latest" to force the use of the latest available version. + pip_url: connector pip URL - if not provided, the pip url will be inferred from the + connector name. + config: connector config - if not provided, you need to set it later via the set_config + method. + use_local_install: whether to use a virtual environment to run the connector. If True, the + connector is expected to be available on the path (e.g. installed via pip). If False, + the connector will be installed automatically in a virtual environment. + install_if_missing: whether to install the connector if it is not available locally. This + parameter is ignored if use_local_install is True.

@@ -48,7 +52,7 @@
Parameters
def - new_local_cache( cache_name: str | None = None, cache_dir: str | pathlib.Path | None = None, cleanup: bool = True) -> airbyte_lib.caches.duckdb.DuckDBCache: + new_local_cache( cache_name: str | None = None, cache_dir: str | pathlib.Path | None = None, *, cleanup: bool = True) -> airbyte_lib.caches.duckdb.DuckDBCache:
@@ -58,7 +62,7 @@
Parameters

Args: cache_name: Name to use for the cache. Defaults to None. - root_dir: Root directory to store the cache in. Defaults to None. + cache_dir: Root directory to store the cache in. Defaults to None. cleanup: Whether to clean up temporary files. Defaults to True.

Cache files are stored in the .cache directory, relative to the current @@ -192,14 +196,14 @@

Parameters
-

This class is representing a source that can be called

+

A class representing a source that can be called.

- Source( executor: airbyte_lib._executor.Executor, name: str, config: Optional[dict[str, Any]] = None, streams: Optional[list[str]] = None) + Source( executor: airbyte_lib._executor.Executor, name: str, config: dict[str, typing.Any] | None = None, streams: list[str] | None = None)
@@ -232,7 +236,7 @@
Parameters
- streams: Optional[list[str]] + streams: list[str] | None
diff --git a/airbyte-lib/docs/generated/airbyte_lib/factories.html b/airbyte-lib/docs/generated/airbyte_lib/factories.html deleted file mode 100644 index c0d27ca14eaa..000000000000 --- a/airbyte-lib/docs/generated/airbyte_lib/factories.html +++ /dev/null @@ -1,7 +0,0 @@ - -
-
- - - - \ No newline at end of file diff --git a/airbyte-lib/poetry.lock b/airbyte-lib/poetry.lock index 58fb0cbf88c7..a201b487b774 100644 --- a/airbyte-lib/poetry.lock +++ b/airbyte-lib/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "airbyte-cdk" -version = "0.58.3" +version = "0.58.7" description = "A framework for writing Airbyte Connectors." optional = false python-versions = ">=3.8" files = [ - {file = "airbyte-cdk-0.58.3.tar.gz", hash = "sha256:da75898d1503d8bd06840cb0c10a06f6a0ebcc77858deca146de34e392b01ede"}, - {file = "airbyte_cdk-0.58.3-py3-none-any.whl", hash = "sha256:00f81ebe7d8c7be724ea2c8f364f31803de2345d1ccbf9cdcad808562e512b7b"}, + {file = "airbyte-cdk-0.58.7.tar.gz", hash = "sha256:00e379e2379b38683992027114a2190f49befec8cbac67d0a2c907786111e77b"}, + {file = "airbyte_cdk-0.58.7-py3-none-any.whl", hash = "sha256:09b31d32899cc6dc91e39716e8d1601503a7884d837752e683d1e3ef7dfe73be"}, ] [package.dependencies] @@ -524,13 +524,13 @@ six = "*" [[package]] name = "jinja2" -version = "3.1.2" +version = "3.1.3" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" files = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, ] [package.dependencies] @@ -1386,7 +1386,147 @@ mypy = [ {version = ">=0.900", markers = "python_version >= \"3.11\""}, {version = ">=0.780", markers = "python_version >= \"3.9\" and python_version < \"3.11\""}, ] -pytest = {version = ">=6.2", markers = "python_version >= \"3.10\""} +pytest = [ + {version = ">=6.2", markers = "python_version >= \"3.10\""}, + {version = ">=4.6", markers = "python_version >= \"3.6\" and python_version < \"3.10\""}, +] + +[[package]] +name = "pytest-postgresql" +version = "5.0.0" +description = "Postgresql fixtures and fixture factories for Pytest." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-postgresql-5.0.0.tar.gz", hash = "sha256:22edcbafab8995ee85b8d948ddfaad4f70c2c7462303d7477ecd2f77fc9d15bd"}, + {file = "pytest_postgresql-5.0.0-py3-none-any.whl", hash = "sha256:6e8f0773b57c9b8975b6392c241b7b81b7018f32079a533f368f2fbda732ecd3"}, +] + +[package.dependencies] +mirakuru = "*" +port-for = ">=0.6.0" +psycopg = ">=3.0.0" +pytest = ">=6.2" +setuptools = "*" + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-ulid" +version = "2.2.0" +description = "Universally unique lexicographically sortable identifier" +optional = false +python-versions = ">=3.9" +files = [ + {file = "python_ulid-2.2.0-py3-none-any.whl", hash = "sha256:ec2e69292c0b7c338a07df5e15b05270be6823675c103383e74d1d531945eab5"}, + {file = "python_ulid-2.2.0.tar.gz", hash = "sha256:9ec777177d396880d94be49ac7eb4ae2cd4a7474448bfdbfe911537add970aeb"}, +] + +[[package]] +name = "pytz" +version = "2023.3.post1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, + {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, +] + +[[package]] +name = "pytzdata" +version = "2020.1" +description = "The Olson timezone database for Python." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, + {file = "pytzdata-2020.1.tar.gz", hash = "sha256:3efa13b335a00a8de1d345ae41ec78dd11c9f8807f522d39850f2dd828681540"}, +] + +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] [[package]] name = "pytest-postgresql" @@ -1701,28 +1841,28 @@ files = [ [[package]] name = "ruff" -version = "0.1.11" +version = "0.1.12" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.1.11-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:a7f772696b4cdc0a3b2e527fc3c7ccc41cdcb98f5c80fdd4f2b8c50eb1458196"}, - {file = "ruff-0.1.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:934832f6ed9b34a7d5feea58972635c2039c7a3b434fe5ba2ce015064cb6e955"}, - {file = "ruff-0.1.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea0d3e950e394c4b332bcdd112aa566010a9f9c95814844a7468325290aabfd9"}, - {file = "ruff-0.1.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9bd4025b9c5b429a48280785a2b71d479798a69f5c2919e7d274c5f4b32c3607"}, - {file = "ruff-0.1.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1ad00662305dcb1e987f5ec214d31f7d6a062cae3e74c1cbccef15afd96611d"}, - {file = "ruff-0.1.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4b077ce83f47dd6bea1991af08b140e8b8339f0ba8cb9b7a484c30ebab18a23f"}, - {file = "ruff-0.1.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4a88efecec23c37b11076fe676e15c6cdb1271a38f2b415e381e87fe4517f18"}, - {file = "ruff-0.1.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b25093dad3b055667730a9b491129c42d45e11cdb7043b702e97125bcec48a1"}, - {file = "ruff-0.1.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:231d8fb11b2cc7c0366a326a66dafc6ad449d7fcdbc268497ee47e1334f66f77"}, - {file = "ruff-0.1.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:09c415716884950080921dd6237767e52e227e397e2008e2bed410117679975b"}, - {file = "ruff-0.1.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:0f58948c6d212a6b8d41cd59e349751018797ce1727f961c2fa755ad6208ba45"}, - {file = "ruff-0.1.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:190a566c8f766c37074d99640cd9ca3da11d8deae2deae7c9505e68a4a30f740"}, - {file = "ruff-0.1.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6464289bd67b2344d2a5d9158d5eb81025258f169e69a46b741b396ffb0cda95"}, - {file = "ruff-0.1.11-py3-none-win32.whl", hash = "sha256:9b8f397902f92bc2e70fb6bebfa2139008dc72ae5177e66c383fa5426cb0bf2c"}, - {file = "ruff-0.1.11-py3-none-win_amd64.whl", hash = "sha256:eb85ee287b11f901037a6683b2374bb0ec82928c5cbc984f575d0437979c521a"}, - {file = "ruff-0.1.11-py3-none-win_arm64.whl", hash = "sha256:97ce4d752f964ba559c7023a86e5f8e97f026d511e48013987623915431c7ea9"}, - {file = "ruff-0.1.11.tar.gz", hash = "sha256:f9d4d88cb6eeb4dfe20f9f0519bd2eaba8119bde87c3d5065c541dbae2b5a2cb"}, + {file = "ruff-0.1.12-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:544038693543c11edc56bb94a9875df2dc249e3616f90c15964c720dcccf0745"}, + {file = "ruff-0.1.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:8a0e3ef6299c4eab75a7740730e4b4bd4a36e0bd8102ded01553403cad088fd4"}, + {file = "ruff-0.1.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47f6d939461e3273f10f4cd059fd0b83c249d73f1736032fffbac83a62939395"}, + {file = "ruff-0.1.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25be18abc1fc3f3d3fb55855c41ed5d52063316defde202f413493bb3888218c"}, + {file = "ruff-0.1.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d41e9f100b50526d80b076fc9c103c729387ff3f10f63606ed1038c30a372a40"}, + {file = "ruff-0.1.12-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:472a0548738d4711549c7874b43fab61aacafb1fede29c5232d4cfb8e2d13f69"}, + {file = "ruff-0.1.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46685ef2f106b827705df876d38617741ed4f858bbdbc0817f94476c45ab6669"}, + {file = "ruff-0.1.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf6073749c70b616d7929897b14824ec6713a6c3a8195dfd2ffdcc66594d880c"}, + {file = "ruff-0.1.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4bdf26e5a2efab4c3aaf6b61648ea47a525dc12775810a85c285dc9ca03e5ac0"}, + {file = "ruff-0.1.12-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b631c6a95e4b6d5c4299e599067b5a89f5b18e2f2d9a6c22b879b3c4b077c96e"}, + {file = "ruff-0.1.12-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:f193f460e231e63af5fc7516897cf5ab257cbda72ae83cf9a654f1c80c3b758a"}, + {file = "ruff-0.1.12-py3-none-musllinux_1_2_i686.whl", hash = "sha256:718523c3a0b787590511f212d30cc9b194228ef369c8bdd72acd1282cc27c468"}, + {file = "ruff-0.1.12-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1c49e826de55d81a6ef93808b760925e492bad7cc470aaa114a3be158b2c7f99"}, + {file = "ruff-0.1.12-py3-none-win32.whl", hash = "sha256:fbb1c002eeacb60161e51d77b2274c968656599477a1c8c65066953276e8ee2b"}, + {file = "ruff-0.1.12-py3-none-win_amd64.whl", hash = "sha256:7fe06ba77e5b7b78db1d058478c47176810f69bb5be7c1b0d06876af59198203"}, + {file = "ruff-0.1.12-py3-none-win_arm64.whl", hash = "sha256:bb29f8e3e6c95024902eaec5a9ce1fd5ac4e77f4594f4554e67fbb0f6d9a2f37"}, + {file = "ruff-0.1.12.tar.gz", hash = "sha256:97189f38c655e573f6bea0d12e9f18aad5539fd08ab50651449450999f45383a"}, ] [[package]] @@ -2045,5 +2185,5 @@ files = [ [metadata] lock-version = "2.0" -python-versions = "^3.10" -content-hash = "a349c7b28e4ed7cb240906e522e373f6b00a5f2ee4ed4138567a0ed094491a30" +python-versions = "^3.9" +content-hash = "65721067d8262d014d4b36deced214a27a8c2bdef7e5cf25eed709619cfa3716" diff --git a/airbyte-lib/pyproject.toml b/airbyte-lib/pyproject.toml index 84f492f0bba3..ac1042b33d72 100644 --- a/airbyte-lib/pyproject.toml +++ b/airbyte-lib/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" packages = [{include = "airbyte_lib"}] [tool.poetry.dependencies] -python = "^3.10" +python = "^3.9" airbyte-cdk = "^0.58.3" # airbyte-protocol-models = "^1.0.1" # Conflicts with airbyte-cdk # TODO: delete or resolve @@ -50,28 +50,97 @@ build-backend = "poetry.core.masonry.api" max-args = 8 # Relaxed from default of 5 [tool.ruff] -target-version = "py310" -select = ["F", "E"] -extend-select = [ - "W", "C90", "I", "N", "UP", "YTT", "ANN", "ASYNC", "BLE", "B", "A", "COM", "C4", "EXE", "FA", "ISC", "ICN", "INP", "PIE", "PYI", "PT", "Q", "RSE", "RET", "SLF", "SLOT", "SIM", "TID", "TCH", "INT", "ARG", "PTH", "TD", "FIX", "PD", "PL", "TRY", "FLY", "NPY", "PERF", "RUF" +target-version = "py39" +select = [ + # For rules reference, see https://docs.astral.sh/ruff/rules/ + "A", # flake8-builtins + "ANN", # flake8-annotations + "ARG", # flake8-unused-arguments + "ASYNC", # flake8-async + "B", # flake8-bugbear + "FBT", # flake8-boolean-trap + "BLE", # Blind except + "C4", # flake8-comprehensions + "C90", # mccabe (complexity) + "COM", # flake8-commas + "CPY", # missing copyright notice + # "D", # pydocstyle # TODO: Re-enable when adding docstrings + "DTZ", # flake8-datetimez + "E", # pycodestyle (errors) + "ERA", # flake8-eradicate (commented out code) + "EXE", # flake8-executable + "F", # Pyflakes + "FA", # flake8-future-annotations + "FIX", # flake8-fixme + "FLY", # flynt + "FURB", # Refurb + "I", # isort + "ICN", # flake8-import-conventions + "INP", # flake8-no-pep420 + "INT", # flake8-gettext + "ISC", # flake8-implicit-str-concat + "ICN", # flake8-import-conventions + "LOG", # flake8-logging + "N", # pep8-naming + "PD", # pandas-vet + "PERF", # Perflint + "PIE", # flake8-pie + "PGH", # pygrep-hooks + "PL", # Pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + "PYI", # flake8-pyi + "Q", # flake8-quotes + "RET", # flake8-return + "RSE", # flake8-raise + "RUF", # Ruff-specific rules + "SIM", # flake8-simplify + "SLF", # flake8-self + "SLOT", # flake8-slots + "T10", # debugger calls + # "T20", # flake8-print # TODO: Re-enable once we have logging + "TCH", # flake8-type-checking + "TD", # flake8-todos + "TID", # flake8-tidy-imports + "TRY", # tryceratops + "UP", # pyupgrade + "W", # pycodestyle (warnings) + "YTT" # flake8-2020 ] ignore = [ # For rules reference, see https://docs.astral.sh/ruff/rules/ - # "I001", # Sorted imports - # "ANN401", # Any-typed expressions + + # These we don't agree with or don't want to prioritize to enforce: "ANN003", # kwargs missing type annotations - # "SIM300", # 'yoda' conditions - "PERF203", # exception handling in loop "ANN101", # Type annotations for 'self' args - "TD002", # Require author for TODOs - "TD003", # Require links for TODOs - "B019", # lru_cache on class methods keep instance from getting garbage collected - "COM812", # Conflicts with ruff auto-format + "COM812", # Because it conflicts with ruff auto-format + "EM", # flake8-errmsgs (may reconsider later) + "DJ", # Django linting + "G", # flake8-logging-format "ISC001", # Conflicts with ruff auto-format - "TRY003" # Raising exceptions with too-long string descriptions # TODO: re-evaluate once we have our own exception classes + "NPY", # NumPy-specific rules + "PIE790", # Allow unnecssary 'pass' (sometimes useful for readability) + "PERF203", # exception handling in loop + "S", # flake8-bandit (noisy, security related) + "TD002", # Require author for TODOs + "TRIO", # flake8-trio (opinionated, noisy) + "TRY003", # Exceptions with too-long string descriptions # TODO: re-evaluate once we have our own exception classes + "INP001", # Dir 'examples' is part of an implicit namespace package. Add an __init__.py. + + # TODO: Consider re-enabling these before release: + "A003", # Class attribute 'type' is shadowing a Python builtin + "BLE001", # Do not catch blind exception: Exception + "ERA001", # Remove commented-out code + "FIX002", # Allow "TODO:" until release (then switch to requiring links via TDO003) + "PLW0603", # Using the global statement to update _cache is discouraged + "TD003", # Require links for TODOs # TODO: Re-enable when we disable FIX002 + "TRY002", # TODO: When we have time to tackle exception management ("Create your own exception") ] fixable = ["ALL"] -unfixable = [] +unfixable = [ + "ERA001", # Commented-out code (avoid silent loss of code) + "T201" # print() calls (avoid silent loss of code / log messages) +] line-length = 100 extend-exclude = ["docs", "test", "tests"] dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" @@ -97,7 +166,7 @@ max-complexity = 24 ignore-overlong-task-comments = true [tool.ruff.pydocstyle] -convention = "numpy" +convention = "google" [tool.ruff.flake8-annotations] allow-star-arg-any = false @@ -113,7 +182,7 @@ docstring-code-format = true [tool.mypy] # Platform configuration -python_version = "3.10" +python_version = "3.9" # imports related ignore_missing_imports = true follow_imports = "silent" diff --git a/airbyte-lib/tests/lint_tests/test_ruff.py b/airbyte-lib/tests/lint_tests/test_ruff.py index 5f654d7b11e4..57262a8f608c 100644 --- a/airbyte-lib/tests/lint_tests/test_ruff.py +++ b/airbyte-lib/tests/lint_tests/test_ruff.py @@ -4,16 +4,7 @@ import pytest -XFAIL = True # Toggle to set if the test is expected to fail or not - -@pytest.mark.xfail( - condition=XFAIL, - reason=( - "This is expected to fail until Ruff cleanup is completed.\n" - "In the meanwhile, use `poetry run ruff check --fix .` to find and fix issues." - ), -) def test_ruff_linting(): # Run the check command check_result = subprocess.run( From be09dfe919c5dde188b38e5ab87e02fbf55fe893 Mon Sep 17 00:00:00 2001 From: Raghav Gupta <43565099+Marcus0086@users.noreply.github.com> Date: Wed, 17 Jan 2024 16:41:25 +0530 Subject: [PATCH 05/12] =?UTF-8?q?=F0=9F=90=9B=20Destination=20Weaviate:=20?= =?UTF-8?q?Multi=20Tenancy=20Support=20(#34229)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Joe Reuter --- .../destination_weaviate/config.py | 1 + .../destination_weaviate/indexer.py | 75 +++- .../integration_tests/spec.json | 385 ++++++++++-------- .../destination-weaviate/metadata.yaml | 2 +- .../unit_tests/indexer_test.py | 77 +++- docs/integrations/destinations/weaviate.md | 3 + 6 files changed, 328 insertions(+), 215 deletions(-) diff --git a/airbyte-integrations/connectors/destination-weaviate/destination_weaviate/config.py b/airbyte-integrations/connectors/destination-weaviate/destination_weaviate/config.py index 6c580102e7c3..c4708d59ffc9 100644 --- a/airbyte-integrations/connectors/destination-weaviate/destination_weaviate/config.py +++ b/airbyte-integrations/connectors/destination-weaviate/destination_weaviate/config.py @@ -65,6 +65,7 @@ class WeaviateIndexingConfigModel(BaseModel): ) batch_size: int = Field(title="Batch Size", description="The number of records to send to Weaviate in each batch", default=128) text_field: str = Field(title="Text Field", description="The field in the object that contains the embedded text", default="text") + tenant_id: str = Field(title="Tenant ID", description="The tenant ID to use for multi tenancy", airbyte_secret=True, default="") default_vectorizer: str = Field( title="Default Vectorizer", description="The vectorizer to use if new classes need to be created", diff --git a/airbyte-integrations/connectors/destination-weaviate/destination_weaviate/indexer.py b/airbyte-integrations/connectors/destination-weaviate/destination_weaviate/indexer.py index 45c54d54bfed..93adb9d825a4 100644 --- a/airbyte-integrations/connectors/destination-weaviate/destination_weaviate/indexer.py +++ b/airbyte-integrations/connectors/destination-weaviate/destination_weaviate/indexer.py @@ -52,6 +52,14 @@ def _create_client(self): batch_size=None, dynamic=False, weaviate_error_retries=weaviate.WeaviateErrorRetryConf(number_retries=5) ) + def _add_tenant_to_class_if_missing(self, class_name: str): + class_tenants = self.client.schema.get_class_tenants(class_name=class_name) + if class_tenants is not None and self.config.tenant_id not in [tenant.name for tenant in class_tenants]: + self.client.schema.add_class_tenants(class_name=class_name, tenants=[weaviate.Tenant(name=self.config.tenant_id)]) + logging.info(f"Added tenant {self.config.tenant_id} to class {class_name}") + else: + logging.info(f"Tenant {self.config.tenant_id} already exists in class {class_name}") + def check(self) -> Optional[str]: deployment_mode = os.environ.get("DEPLOYMENT_MODE", "") if deployment_mode.casefold() == CLOUD_DEPLOYMENT_MODE and not self._uses_safe_config(): @@ -69,6 +77,11 @@ def pre_sync(self, catalog: ConfiguredAirbyteCatalog) -> None: self._create_client() classes = {c["class"]: c for c in self.client.schema.get().get("classes", [])} self.has_record_id_metadata = defaultdict(lambda: False) + + if self.config.tenant_id.strip(): + for class_name in classes.keys(): + self._add_tenant_to_class_if_missing(class_name) + for stream in catalog.streams: class_name = self._stream_to_class_name(stream.stream.name) schema = classes[class_name] if class_name in classes else None @@ -78,24 +91,29 @@ def pre_sync(self, catalog: ConfiguredAirbyteCatalog) -> None: self.client.schema.create_class(schema) logging.info(f"Recreated class {class_name}") elif class_name not in classes: - self.client.schema.create_class( - { - "class": class_name, - "vectorizer": self.config.default_vectorizer, - "properties": [ - { - # Record ID is used for bookkeeping, not for searching - "name": METADATA_RECORD_ID_FIELD, - "dataType": ["text"], - "description": "Record ID, used for bookkeeping.", - "indexFilterable": True, - "indexSearchable": False, - "tokenization": "field", - } - ], - } - ) + config = { + "class": class_name, + "vectorizer": self.config.default_vectorizer, + "properties": [ + { + # Record ID is used for bookkeeping, not for searching + "name": METADATA_RECORD_ID_FIELD, + "dataType": ["text"], + "description": "Record ID, used for bookkeeping.", + "indexFilterable": True, + "indexSearchable": False, + "tokenization": "field", + } + ], + } + if self.config.tenant_id.strip(): + config["multiTenancyConfig"] = {"enabled": True} + + self.client.schema.create_class(config) logging.info(f"Created class {class_name}") + + if self.config.tenant_id.strip(): + self._add_tenant_to_class_if_missing(class_name) else: self.has_record_id_metadata[class_name] = schema is not None and any( prop.get("name") == METADATA_RECORD_ID_FIELD for prop in schema.get("properties", {}) @@ -105,10 +123,18 @@ def delete(self, delete_ids, namespace, stream): if len(delete_ids) > 0: class_name = self._stream_to_class_name(stream) if self.has_record_id_metadata[class_name]: - self.client.batch.delete_objects( - class_name=class_name, - where={"path": [METADATA_RECORD_ID_FIELD], "operator": "ContainsAny", "valueStringArray": delete_ids}, - ) + where_filter = {"path": [METADATA_RECORD_ID_FIELD], "operator": "ContainsAny", "valueStringArray": delete_ids} + if self.config.tenant_id.strip(): + self.client.batch.delete_objects( + class_name=class_name, + tenant=self.config.tenant_id, + where=where_filter, + ) + else: + self.client.batch.delete_objects( + class_name=class_name, + where=where_filter, + ) def index(self, document_chunks, namespace, stream): if len(document_chunks) == 0: @@ -124,7 +150,12 @@ def index(self, document_chunks, namespace, stream): weaviate_object[self.config.text_field] = chunk.page_content object_id = str(uuid.uuid4()) class_name = self._stream_to_class_name(chunk.record.stream) - self.client.batch.add_data_object(weaviate_object, class_name, object_id, vector=chunk.embedding) + if self.config.tenant_id.strip(): + self.client.batch.add_data_object( + weaviate_object, class_name, object_id, vector=chunk.embedding, tenant=self.config.tenant_id + ) + else: + self.client.batch.add_data_object(weaviate_object, class_name, object_id, vector=chunk.embedding) self._flush() def _stream_to_class_name(self, stream_name: str) -> str: diff --git a/airbyte-integrations/connectors/destination-weaviate/integration_tests/spec.json b/airbyte-integrations/connectors/destination-weaviate/integration_tests/spec.json index 3923a8851c4d..a5db30c7213d 100644 --- a/airbyte-integrations/connectors/destination-weaviate/integration_tests/spec.json +++ b/airbyte-integrations/connectors/destination-weaviate/integration_tests/spec.json @@ -5,164 +5,6 @@ "description": "The configuration model for the Vector DB based destinations. This model is used to generate the UI for the destination configuration,\nas well as to provide type safety for the configuration passed to the destination.\n\nThe configuration model is composed of four parts:\n* Processing configuration\n* Embedding configuration\n* Indexing configuration\n* Advanced configuration\n\nProcessing, embedding and advanced configuration are provided by this base class, while the indexing configuration is provided by the destination connector in the sub class.", "type": "object", "properties": { - "processing": { - "title": "ProcessingConfigModel", - "type": "object", - "properties": { - "chunk_size": { - "title": "Chunk size", - "description": "Size of chunks in tokens to store in vector store (make sure it is not too big for the context if your LLM)", - "minimum": 1, - "maximum": 8191, - "type": "integer" - }, - "chunk_overlap": { - "title": "Chunk overlap", - "description": "Size of overlap between chunks in tokens to store in vector store to better capture relevant context", - "default": 0, - "type": "integer" - }, - "text_fields": { - "title": "Text fields to embed", - "description": "List of fields in the record that should be used to calculate the embedding. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered text fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array.", - "default": [], - "always_show": true, - "examples": ["text", "user.name", "users.*.name"], - "type": "array", - "items": { "type": "string" } - }, - "metadata_fields": { - "title": "Fields to store as metadata", - "description": "List of fields in the record that should be stored as metadata. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered metadata fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. When specifying nested paths, all matching values are flattened into an array set to a field named by the path.", - "default": [], - "always_show": true, - "examples": ["age", "user", "user.name"], - "type": "array", - "items": { "type": "string" } - }, - "field_name_mappings": { - "title": "Field name mappings", - "description": "List of fields to rename. Not applicable for nested fields, but can be used to rename fields already flattened via dot notation.", - "default": [], - "type": "array", - "items": { - "title": "FieldNameMappingConfigModel", - "type": "object", - "properties": { - "from_field": { - "title": "From field name", - "description": "The field name in the source", - "type": "string" - }, - "to_field": { - "title": "To field name", - "description": "The field name to use in the destination", - "type": "string" - } - }, - "required": ["from_field", "to_field"] - } - }, - "text_splitter": { - "title": "Text splitter", - "description": "Split text fields into chunks based on the specified method.", - "type": "object", - "oneOf": [ - { - "title": "By Separator", - "type": "object", - "properties": { - "mode": { - "title": "Mode", - "default": "separator", - "const": "separator", - "enum": ["separator"], - "type": "string" - }, - "separators": { - "title": "Separators", - "description": "List of separator strings to split text fields by. The separator itself needs to be wrapped in double quotes, e.g. to split by the dot character, use \".\". To split by a newline, use \"\\n\".", - "default": ["\"\\n\\n\"", "\"\\n\"", "\" \"", "\"\""], - "type": "array", - "items": { "type": "string" } - }, - "keep_separator": { - "title": "Keep separator", - "description": "Whether to keep the separator in the resulting chunks", - "default": false, - "type": "boolean" - } - }, - "required": ["mode"], - "description": "Split the text by the list of separators until the chunk size is reached, using the earlier mentioned separators where possible. This is useful for splitting text fields by paragraphs, sentences, words, etc." - }, - { - "title": "By Markdown header", - "type": "object", - "properties": { - "mode": { - "title": "Mode", - "default": "markdown", - "const": "markdown", - "enum": ["markdown"], - "type": "string" - }, - "split_level": { - "title": "Split level", - "description": "Level of markdown headers to split text fields by. Headings down to the specified level will be used as split points", - "default": 1, - "minimum": 1, - "maximum": 6, - "type": "integer" - } - }, - "required": ["mode"], - "description": "Split the text by Markdown headers down to the specified header level. If the chunk size fits multiple sections, they will be combined into a single chunk." - }, - { - "title": "By Programming Language", - "type": "object", - "properties": { - "mode": { - "title": "Mode", - "default": "code", - "const": "code", - "enum": ["code"], - "type": "string" - }, - "language": { - "title": "Language", - "description": "Split code in suitable places based on the programming language", - "enum": [ - "cpp", - "go", - "java", - "js", - "php", - "proto", - "python", - "rst", - "ruby", - "rust", - "scala", - "swift", - "markdown", - "latex", - "html", - "sol" - ], - "type": "string" - } - }, - "required": ["language", "mode"], - "description": "Split the text by suitable delimiters based on the programming language. This is useful for splitting code into chunks." - } - ] - } - }, - "required": ["chunk_size"], - "group": "processing" - }, "embedding": { "title": "Embedding", "description": "Embedding configuration", @@ -181,8 +23,8 @@ "type": "string" } }, - "required": ["mode"], - "description": "Do not calculate and pass embeddings to Weaviate. Suitable for clusters with configured vectorizers to calculate embeddings within Weaviate or for classes that should only support regular text search." + "description": "Do not calculate and pass embeddings to Weaviate. Suitable for clusters with configured vectorizers to calculate embeddings within Weaviate or for classes that should only support regular text search.", + "required": ["mode"] }, { "title": "Azure OpenAI", @@ -296,8 +138,8 @@ "type": "string" } }, - "required": ["mode"], - "description": "Use a fake embedding made out of random vectors with 1536 embedding dimensions. This is useful for testing the data pipeline without incurring any costs." + "description": "Use a fake embedding made out of random vectors with 1536 embedding dimensions. This is useful for testing the data pipeline without incurring any costs.", + "required": ["mode"] }, { "title": "OpenAI-compatible", @@ -341,6 +183,177 @@ } ] }, + "processing": { + "title": "ProcessingConfigModel", + "type": "object", + "properties": { + "chunk_size": { + "title": "Chunk size", + "description": "Size of chunks in tokens to store in vector store (make sure it is not too big for the context if your LLM)", + "maximum": 8191, + "minimum": 1, + "type": "integer" + }, + "chunk_overlap": { + "title": "Chunk overlap", + "description": "Size of overlap between chunks in tokens to store in vector store to better capture relevant context", + "default": 0, + "type": "integer" + }, + "text_fields": { + "title": "Text fields to embed", + "description": "List of fields in the record that should be used to calculate the embedding. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered text fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array.", + "default": [], + "always_show": true, + "examples": ["text", "user.name", "users.*.name"], + "type": "array", + "items": { + "type": "string" + } + }, + "metadata_fields": { + "title": "Fields to store as metadata", + "description": "List of fields in the record that should be stored as metadata. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered metadata fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. When specifying nested paths, all matching values are flattened into an array set to a field named by the path.", + "default": [], + "always_show": true, + "examples": ["age", "user", "user.name"], + "type": "array", + "items": { + "type": "string" + } + }, + "text_splitter": { + "title": "Text splitter", + "description": "Split text fields into chunks based on the specified method.", + "type": "object", + "oneOf": [ + { + "title": "By Separator", + "type": "object", + "properties": { + "mode": { + "title": "Mode", + "default": "separator", + "const": "separator", + "enum": ["separator"], + "type": "string" + }, + "separators": { + "title": "Separators", + "description": "List of separator strings to split text fields by. The separator itself needs to be wrapped in double quotes, e.g. to split by the dot character, use \".\". To split by a newline, use \"\\n\".", + "default": ["\"\\n\\n\"", "\"\\n\"", "\" \"", "\"\""], + "type": "array", + "items": { + "type": "string" + } + }, + "keep_separator": { + "title": "Keep separator", + "description": "Whether to keep the separator in the resulting chunks", + "default": false, + "type": "boolean" + } + }, + "description": "Split the text by the list of separators until the chunk size is reached, using the earlier mentioned separators where possible. This is useful for splitting text fields by paragraphs, sentences, words, etc.", + "required": ["mode"] + }, + { + "title": "By Markdown header", + "type": "object", + "properties": { + "mode": { + "title": "Mode", + "default": "markdown", + "const": "markdown", + "enum": ["markdown"], + "type": "string" + }, + "split_level": { + "title": "Split level", + "description": "Level of markdown headers to split text fields by. Headings down to the specified level will be used as split points", + "default": 1, + "minimum": 1, + "maximum": 6, + "type": "integer" + } + }, + "description": "Split the text by Markdown headers down to the specified header level. If the chunk size fits multiple sections, they will be combined into a single chunk.", + "required": ["mode"] + }, + { + "title": "By Programming Language", + "type": "object", + "properties": { + "mode": { + "title": "Mode", + "default": "code", + "const": "code", + "enum": ["code"], + "type": "string" + }, + "language": { + "title": "Language", + "description": "Split code in suitable places based on the programming language", + "enum": [ + "cpp", + "go", + "java", + "js", + "php", + "proto", + "python", + "rst", + "ruby", + "rust", + "scala", + "swift", + "markdown", + "latex", + "html", + "sol" + ], + "type": "string" + } + }, + "required": ["language", "mode"], + "description": "Split the text by suitable delimiters based on the programming language. This is useful for splitting code into chunks." + } + ] + }, + "field_name_mappings": { + "title": "Field name mappings", + "description": "List of fields to rename. Not applicable for nested fields, but can be used to rename fields already flattened via dot notation.", + "default": [], + "type": "array", + "items": { + "title": "FieldNameMappingConfigModel", + "type": "object", + "properties": { + "from_field": { + "title": "From field name", + "description": "The field name in the source", + "type": "string" + }, + "to_field": { + "title": "To field name", + "description": "The field name to use in the destination", + "type": "string" + } + }, + "required": ["from_field", "to_field"] + } + } + }, + "required": ["chunk_size"], + "group": "processing" + }, + "omit_raw_text": { + "title": "Do not store raw text", + "description": "Do not store the text that gets embedded along with the vector and the metadata in the destination. If set to true, only the vector and the metadata will be stored - in this case raw text for LLM use cases needs to be retrieved from another source.", + "default": false, + "group": "advanced", + "type": "boolean" + }, "indexing": { "title": "Indexing", "type": "object", @@ -419,8 +432,8 @@ "type": "string" } }, - "required": ["mode"], - "description": "Do not authenticate (suitable for locally running test clusters, do not use for clusters with public IP addresses)" + "description": "Do not authenticate (suitable for locally running test clusters, do not use for clusters with public IP addresses)", + "required": ["mode"] } ] }, @@ -436,6 +449,13 @@ "default": "text", "type": "string" }, + "tenant_id": { + "title": "Tenant ID", + "description": "The tenant ID to use for multi tenancy", + "airbyte_secret": true, + "default": "", + "type": "string" + }, "default_vectorizer": { "title": "Default Vectorizer", "description": "The vectorizer to use if new classes need to be created", @@ -457,14 +477,20 @@ "description": "Additional HTTP headers to send with every request.", "default": [], "examples": [ - { "header_key": "X-OpenAI-Api-Key", "value": "my-openai-api-key" } + { + "header_key": "X-OpenAI-Api-Key", + "value": "my-openai-api-key" + } ], "type": "array", "items": { "title": "Header", "type": "object", "properties": { - "header_key": { "title": "Header Key", "type": "string" }, + "header_key": { + "title": "Header Key", + "type": "string" + }, "value": { "title": "Header Value", "airbyte_secret": true, @@ -478,21 +504,26 @@ "required": ["host", "auth"], "group": "indexing", "description": "Indexing configuration" - }, - "omit_raw_text": { - "title": "Do not store raw text", - "description": "Do not store the text that gets embedded along with the vector and the metadata in the destination. If set to true, only the vector and the metadata will be stored - in this case raw text for LLM use cases needs to be retrieved from another source.", - "default": false, - "group": "advanced", - "type": "boolean" } }, "required": ["embedding", "processing", "indexing"], "groups": [ - { "id": "processing", "title": "Processing" }, - { "id": "embedding", "title": "Embedding" }, - { "id": "indexing", "title": "Indexing" }, - { "id": "advanced", "title": "Advanced" } + { + "id": "processing", + "title": "Processing" + }, + { + "id": "embedding", + "title": "Embedding" + }, + { + "id": "indexing", + "title": "Indexing" + }, + { + "id": "advanced", + "title": "Advanced" + } ] }, "supportsIncremental": true, diff --git a/airbyte-integrations/connectors/destination-weaviate/metadata.yaml b/airbyte-integrations/connectors/destination-weaviate/metadata.yaml index f519260f42f7..29aeefaf0831 100644 --- a/airbyte-integrations/connectors/destination-weaviate/metadata.yaml +++ b/airbyte-integrations/connectors/destination-weaviate/metadata.yaml @@ -13,7 +13,7 @@ data: connectorSubtype: vectorstore connectorType: destination definitionId: 7b7d7a0d-954c-45a0-bcfc-39a634b97736 - dockerImageTag: 0.2.13 + dockerImageTag: 0.2.14 dockerRepository: airbyte/destination-weaviate documentationUrl: https://docs.airbyte.com/integrations/destinations/weaviate githubIssueLabel: destination-weaviate diff --git a/airbyte-integrations/connectors/destination-weaviate/unit_tests/indexer_test.py b/airbyte-integrations/connectors/destination-weaviate/unit_tests/indexer_test.py index 043a4b6ea68c..a5b2526e392c 100644 --- a/airbyte-integrations/connectors/destination-weaviate/unit_tests/indexer_test.py +++ b/airbyte-integrations/connectors/destination-weaviate/unit_tests/indexer_test.py @@ -71,6 +71,32 @@ def test_pre_sync_that_creates_class(self, MockClient): } ) + @patch("destination_weaviate.indexer.weaviate.Client") + def test_pre_sync_that_creates_class_with_multi_tenancy_enabled(self, MockClient): + mock_client = Mock() + self.config.tenant_id = "test_tenant" + mock_client.schema.get_class_tenants.return_value = [] + mock_client.schema.get.return_value = {"classes": []} + MockClient.return_value = mock_client + self.indexer.pre_sync(self.mock_catalog) + mock_client.schema.create_class.assert_called_with( + { + "class": "Test", + "multiTenancyConfig": {"enabled": True}, + "vectorizer": "none", + "properties": [ + { + "name": "_ab_record_id", + "dataType": ["text"], + "description": "Record ID, used for bookkeeping.", + "indexFilterable": True, + "indexSearchable": False, + "tokenization": "field", + } + ], + } + ) + @patch("destination_weaviate.indexer.weaviate.Client") def test_pre_sync_that_deletes(self, MockClient): mock_client = Mock() @@ -104,6 +130,19 @@ def test_index_deletes_by_record_id(self): where={"path": ["_ab_record_id"], "operator": "ContainsAny", "valueStringArray": ["some_id", "some_other_id"]}, ) + def test_index_deletes_by_record_id_with_tenant_id(self): + mock_client = Mock() + self.config.tenant_id = "test_tenant" + self.indexer.client = mock_client + self.indexer.has_record_id_metadata = defaultdict(None) + self.indexer.has_record_id_metadata["Test"] = True + self.indexer.delete(["some_id", "some_other_id"], None, "test") + mock_client.batch.delete_objects.assert_called_with( + class_name="Test", + tenant="test_tenant", + where={"path": ["_ab_record_id"], "operator": "ContainsAny", "valueStringArray": ["some_id", "some_other_id"]}, + ) + @patch("destination_weaviate.indexer.weaviate.Client") def test_index_not_delete_no_metadata_field(self, MockClient): mock_client = Mock() @@ -200,31 +239,39 @@ def test_index_flushes_batch_and_normalizes(self): page_content="some_content", embedding=[1, 2, 3], metadata={ - "someField": "some_value", "complex": {"a": [1, 2, 3]}, "UPPERCASE_NAME": "abc", "id": 12, "empty_list": [], - "referral Agency Name": "test1", - "123StartsWithNumber": "test2", - "special&*chars": "test3", - "with spaces": "test4", - "": "test5", - "_startsWithUnderscore": "test6", - "multiple spaces": "test7", - "SpecialCharacters!@#": "test8" - }, + "someField": "some_value", + "complex": {"a": [1, 2, 3]}, + "UPPERCASE_NAME": "abc", + "id": 12, + "empty_list": [], + "referral Agency Name": "test1", + "123StartsWithNumber": "test2", + "special&*chars": "test3", + "with spaces": "test4", + "": "test5", + "_startsWithUnderscore": "test6", + "multiple spaces": "test7", + "SpecialCharacters!@#": "test8", + }, record=AirbyteRecordMessage(stream="test", data={"someField": "some_value"}, emitted_at=0), ) self.indexer.index([mock_chunk], None, "test") mock_client.batch.add_data_object.assert_called_with( - {"someField": "some_value", "complex": '{"a": [1, 2, 3]}', "uPPERCASE_NAME": "abc", "text": "some_content", "raw_id": 12, - "referral_Agency_Name": "test1", + { + "someField": "some_value", + "complex": '{"a": [1, 2, 3]}', + "uPPERCASE_NAME": "abc", + "text": "some_content", + "raw_id": 12, + "referral_Agency_Name": "test1", "_123StartsWithNumber": "test2", "specialchars": "test3", "with_spaces": "test4", "_": "test5", "_startsWithUnderscore": "test6", "multiple__spaces": "test7", - "specialCharacters": "test8" - - }, + "specialCharacters": "test8", + }, "Test", ANY, vector=[1, 2, 3], diff --git a/docs/integrations/destinations/weaviate.md b/docs/integrations/destinations/weaviate.md index ed98586015b0..c8acb02b50de 100644 --- a/docs/integrations/destinations/weaviate.md +++ b/docs/integrations/destinations/weaviate.md @@ -79,10 +79,13 @@ You can also create the class in Weaviate in advance if you need more control ov As properties have to start will a lowercase letter in Weaviate and can't contain spaces or special characters. Field names might be updated during the loading process. The field names `id`, `_id` and `_additional` are reserved keywords in Weaviate, so they will be renamed to `raw_id`, `raw__id` and `raw_additional` respectively. +When using [multi-tenancy](https://weaviate.io/developers/weaviate/manage-data/multi-tenancy), the tenant id can be configured in the connector configuration. If not specified, multi-tenancy will be disabled. In case you want to index into an already created class, you need to make sure the class is created with multi-tenancy enabled. In case the class doesn't exist, it will be created with multi-tenancy properly configured. If the class already exists but the tenant id is not associated with the class, the connector will automatically add the tenant id to the class. This allows you to configure multiple connections for different tenants on the same schema. + ## Changelog | Version | Date | Pull Request | Subject | | :------ | :--------- | :--------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------- | +| 0.2.14 | 2023-01-15 | [34229](https://github.com/airbytehq/airbyte/pull/34229) | Allow configuring tenant id | | 0.2.13 | 2023-12-11 | [33303](https://github.com/airbytehq/airbyte/pull/33303) | Fix bug with embedding special tokens | | 0.2.12 | 2023-12-07 | [33218](https://github.com/airbytehq/airbyte/pull/33218) | Normalize metadata field names | | 0.2.11 | 2023-12-01 | [32697](https://github.com/airbytehq/airbyte/pull/32697) | Allow omitting raw text | From a6f4c2e11dd820e8a7d5532eedefa92c57d96c3a Mon Sep 17 00:00:00 2001 From: Anton Karpets Date: Wed, 17 Jan 2024 15:56:58 +0200 Subject: [PATCH 06/12] =?UTF-8?q?=F0=9F=90=9BSource=20Amazon=20Seller=20Pa?= =?UTF-8?q?rtner:=20delete=20deprecated=20streams=20(#34283)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../acceptance-test-config.yml | 4 -- ...og_brand_analytics_alternate_purchase.json | 40 ------------------- ...talog_brand_analytics_item_comparison.json | 40 ------------------- .../integration_tests/expected_records.jsonl | 19 ++++----- .../integration_tests/future_state.json | 22 ---------- .../metadata.yaml | 2 +- ...D_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json | 36 ----------------- ...RAND_ANALYTICS_ITEM_COMPARISON_REPORT.json | 36 ----------------- .../source_amazon_seller_partner/source.py | 4 -- .../source_amazon_seller_partner/spec.json | 2 - .../source_amazon_seller_partner/streams.py | 10 ----- .../sources/amazon-seller-partner.md | 3 +- 12 files changed, 10 insertions(+), 208 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_alternate_purchase.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_item_comparison.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT.json diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml b/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml index bd284f016347..e94239156b64 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml @@ -91,8 +91,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_FBA_SNS_FORECAST_DATA bypass_reason: "no records" - - name: GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT - bypass_reason: "no records" - name: GET_AFN_INVENTORY_DATA bypass_reason: "no records" - name: GET_MERCHANT_CANCELLED_LISTINGS_DATA @@ -101,8 +99,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_LEDGER_SUMMARY_VIEW_DATA bypass_reason: "no records" - - name: GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT - bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_alternate_purchase.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_alternate_purchase.json deleted file mode 100644 index 2ce8fbb81064..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_alternate_purchase.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT", - "json_schema": { - "title": "Brand Analytics Alternate Purchase Reports", - "description": "Brand Analytics Alternate Purchase Reports", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "startDate": { - "type": ["null", "string"], - "format": "date" - }, - "endDate": { - "type": ["null", "string"], - "format": "date" - }, - "asin": { - "type": ["null", "string"] - }, - "purchasedAsin": { - "type": ["null", "string"] - }, - "purchasedRank": { - "type": ["null", "integer"] - }, - "purchasedPct": { - "type": ["null", "number"] - } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_item_comparison.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_item_comparison.json deleted file mode 100644 index 4d7300e63157..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_brand_analytics_item_comparison.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT", - "json_schema": { - "title": "Brand Analytics Item Comparison Reports", - "description": "Brand Analytics Item Comparison Reports", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "startDate": { - "type": ["null", "string"], - "format": "date" - }, - "endDate": { - "type": ["null", "string"], - "format": "date" - }, - "asin": { - "type": ["null", "string"] - }, - "comparedAsin": { - "type": ["null", "string"] - }, - "comparedRank": { - "type": ["null", "integer"] - }, - "comparedPct": { - "type": ["null", "number"] - } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/expected_records.jsonl index 014937625c8d..c36b190c842b 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/expected_records.jsonl @@ -43,17 +43,14 @@ {"stream": "OrderItems", "data": {"TaxCollection": {"Model": "MarketplaceFacilitator", "ResponsibleParty": "Amazon Services, Inc."}, "ProductInfo": {"NumberOfItems": "1"}, "BuyerInfo": {}, "ItemTax": {"CurrencyCode": "USD", "Amount": "0.00"}, "QuantityShipped": 0, "BuyerRequestedCancel": {"IsBuyerRequestedCancel": "false", "BuyerCancelReason": ""}, "ItemPrice": {"CurrencyCode": "USD", "Amount": "14.00"}, "ASIN": "B074K5MDLW", "SellerSKU": "2J-D6V7-C8XI", "Title": "Beyond Meat Beyond Burger Plant-Based Patties 2 pk, 8 oz (Frozen)", "IsGift": "false", "ConditionSubtypeId": "New", "IsTransparency": false, "QuantityOrdered": 0, "PromotionDiscountTax": {"CurrencyCode": "USD", "Amount": "0.00"}, "ConditionId": "New", "PromotionDiscount": {"CurrencyCode": "USD", "Amount": "0.00"}, "OrderItemId": "64356568394218", "LastUpdateDate": "2022-07-29T08:19:16Z", "AmazonOrderId": "113-8871452-8288246"}, "emitted_at": 1701969243138} {"stream": "GET_RESTOCK_INVENTORY_RECOMMENDATIONS_REPORT", "data": {"Country": "US", "Product Name": "Airbyte T-Shirt Black", "FNSKU": "X0041NMBPF", "Merchant SKU": "IA-VREM-8L92", "ASIN": "B0CJ5Q3NLP", "Condition": "New", "Supplier": "unassigned", "Supplier part no.": "", "Currency code": "USD", "Price": "15.00", "Sales last 30 days": "0.0", "Units Sold Last 30 Days": "0", "Total Units": "0", "Inbound": "0", "Available": "0", "FC transfer": "0", "FC Processing": "0", "Customer Order": "0", "Unfulfillable": "0", "Working": "0", "Shipped": "0", "Receiving": "0", "Fulfilled by": "Amazon", "Total Days of Supply (including units from open shipments)": "", "Days of Supply at Amazon Fulfillment Network": "", "Alert": "out_of_stock", "Recommended replenishment qty": "0", "Recommended ship date": "none", "Recommended action": "No action required", "Unit storage size": "", "dataEndTime": "2022-07-31"}, "emitted_at": 1701969512824} {"stream": "GET_RESTOCK_INVENTORY_RECOMMENDATIONS_REPORT", "data": {"Country": "US", "Product Name": "Airbyte Merch White", "FNSKU": "X003X1FG67", "Merchant SKU": "KW-J7BQ-WNKL", "ASIN": "B0CDLLJ5VV", "Condition": "New", "Supplier": "unassigned", "Supplier part no.": "", "Currency code": "USD", "Price": "10.00", "Sales last 30 days": "0.0", "Units Sold Last 30 Days": "0", "Total Units": "0", "Inbound": "0", "Available": "0", "FC transfer": "0", "FC Processing": "0", "Customer Order": "0", "Unfulfillable": "0", "Working": "0", "Shipped": "0", "Receiving": "0", "Fulfilled by": "Amazon", "Total Days of Supply (including units from open shipments)": "", "Days of Supply at Amazon Fulfillment Network": "", "Alert": "out_of_stock", "Recommended replenishment qty": "0", "Recommended ship date": "none", "Recommended action": "No action required", "Unit storage size": "0.1736 ft3", "dataEndTime": "2022-07-31"}, "emitted_at": 1701969512826} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18923842351", "settlement-start-date": "2023-10-16T22:51:31+00:00", "settlement-end-date": "2023-11-13T22:51:31+00:00", "deposit-date": "2023-11-15T22:51:31+00:00", "total-amount": "-39.99", "currency": "USD", "transaction-type": "", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": null, "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "", "dataEndTime": "2023-11-13"}, "emitted_at": 1701969629629} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18923842351", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Payable to Amazon", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-10-16T22:51:31+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "-27.54", "dataEndTime": "2023-11-13"}, "emitted_at": 1701969629631} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18923842351", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Subscription Fee", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-11-09T18:44:35+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "-39.99", "dataEndTime": "2023-11-13"}, "emitted_at": 1701969629631} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18923842351", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Successful charge", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-10-17T00:01:09+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "27.54 ", "dataEndTime": "2023-11-13"}, "emitted_at": 1701969629632} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18834943411", "settlement-start-date": "2023-10-02T22:51:31+00:00", "settlement-end-date": "2023-10-16T22:51:31+00:00", "deposit-date": "2023-10-18T22:51:31+00:00", "total-amount": "-27.54", "currency": "USD", "transaction-type": "", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": null, "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "", "dataEndTime": "2023-10-16"}, "emitted_at": 1701969660859} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18834943411", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Subscription Fee", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-10-09T20:49:19+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "-39.99", "dataEndTime": "2023-10-16"}, "emitted_at": 1701969660860} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18834943411", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Previous Reserve Amount Balance", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-10-02T22:58:21+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "12.45", "dataEndTime": "2023-10-16"}, "emitted_at": 1701969660861} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18654297941", "settlement-start-date": "2023-09-18T22:51:31+00:00", "settlement-end-date": "2023-10-02T22:51:31+00:00", "deposit-date": "2023-10-04T22:51:31+00:00", "total-amount": "0.00", "currency": "USD", "transaction-type": "", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": null, "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "", "dataEndTime": "2023-10-02"}, "emitted_at": 1701969692191} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18654297941", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Order", "order-id": "111-1308361-8778604", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "D7vNnKlKr", "marketplace-name": "Amazon.com", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "MFN", "posted-date": "2023-09-26T12:06:28+00:00", "order-item-code": "85435093931281", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "IA-VREM-8L92", "quantity-purchased": "1", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "", "dataEndTime": "2023-10-02"}, "emitted_at": 1701969692192} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18654297941", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Order", "order-id": "111-1308361-8778604", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "D7vNnKlKr", "marketplace-name": "Amazon.com", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "MFN", "posted-date": "2023-09-26T12:06:28+00:00", "order-item-code": "85435093931281", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "IA-VREM-8L92", "quantity-purchased": "", "price-type": "Principal", "price-amount": "15.00", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "", "dataEndTime": "2023-10-02"}, "emitted_at": 1701969692192} -{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18654297941", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Order", "order-id": "111-1308361-8778604", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "D7vNnKlKr", "marketplace-name": "Amazon.com", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "MFN", "posted-date": "2023-09-26T12:06:28+00:00", "order-item-code": "85435093931281", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "IA-VREM-8L92", "quantity-purchased": "", "price-type": "Tax", "price-amount": "0.86", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "", "dataEndTime": "2023-10-02"}, "emitted_at": 1701969692193} +{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "19009771651", "settlement-start-date": "2023-11-13T22:51:31+00:00", "settlement-end-date": "2023-12-11T22:51:31+00:00", "deposit-date": "2023-12-13T22:51:31+00:00", "total-amount": "-39.99", "currency": "USD", "transaction-type": "", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": null, "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "", "dataEndTime": "2023-12-11"}, "emitted_at": 1705396604115} +{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "19009771651", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Payable to Amazon", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-11-13T22:51:31+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "-39.99", "dataEndTime": "2023-12-11"}, "emitted_at": 1705396604117} +{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "19009771651", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Subscription Fee", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-12-09T20:02:53+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "-39.99", "dataEndTime": "2023-12-11"}, "emitted_at": 1705396604118} +{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "19009771651", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Successful charge", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-11-13T23:51:01+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "39.99 ", "dataEndTime": "2023-12-11"}, "emitted_at": 1705396604118} +{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18923842351", "settlement-start-date": "2023-10-16T22:51:31+00:00", "settlement-end-date": "2023-11-13T22:51:31+00:00", "deposit-date": "2023-11-15T22:51:31+00:00", "total-amount": "-39.99", "currency": "USD", "transaction-type": "", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": null, "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "", "dataEndTime": "2023-11-13"}, "emitted_at": 1705396605853} +{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18923842351", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Payable to Amazon", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-10-16T22:51:31+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "-27.54", "dataEndTime": "2023-11-13"}, "emitted_at": 1705396605855} +{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18923842351", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Subscription Fee", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-11-09T18:44:35+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "-39.99", "dataEndTime": "2023-11-13"}, "emitted_at": 1705396605856} +{"stream": "GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE", "data": {"settlement-id": "18923842351", "settlement-start-date": null, "settlement-end-date": null, "deposit-date": null, "total-amount": "", "currency": "", "transaction-type": "Successful charge", "order-id": "", "merchant-order-id": "", "adjustment-id": "", "shipment-id": "", "marketplace-name": "", "shipment-fee-type": "", "shipment-fee-amount": "", "order-fee-type": "", "order-fee-amount": "", "fulfillment-id": "", "posted-date": "2023-10-17T00:01:09+00:00", "order-item-code": "", "merchant-order-item-id": "", "merchant-adjustment-item-id": "", "sku": "", "quantity-purchased": "", "price-type": "", "price-amount": "", "item-related-fee-type": "", "item-related-fee-amount": "", "misc-fee-amount": "", "other-fee-amount": "", "other-fee-reason-description": "", "direct-payment-type": "", "direct-payment-amount": "", "other-amount": "27.54 ", "dataEndTime": "2023-11-13"}, "emitted_at": 1705396605857} {"stream": "GET_MERCHANT_LISTINGS_DATA_BACK_COMPAT", "data": {"item-name": "GiftBox", "item-description": "Monitor and optimize the GiftBox to reward your customers and increase the average order value", "listing-id": "0711ZJUYPNS", "seller-sku": "I0-RALD-N1UR", "price": "5", "quantity": "1000", "open-date": "2022-07-11T01:34:18-07:00", "image-url": "", "item-is-marketplace": "y", "product-id-type": "1", "zshop-shipping-fee": "", "item-note": "", "item-condition": "11", "zshop-category1": "", "zshop-browse-path": "", "zshop-storefront-feature": "", "asin1": "B0B68NBQ1Y", "asin2": "", "asin3": "", "will-ship-internationally": "", "expedited-shipping": "", "zshop-boldface": "", "product-id": "B0B68NBQ1Y", "bid-for-featured-placement": "", "add-delete": "", "pending-quantity": "0", "Business Price": "6.0", "Quantity Price Type": "", "Quantity Lower Bound 1": "", "Quantity Price 1": "", "Quantity Lower Bound 2": "", "Quantity Price 2": "", "Quantity Lower Bound 3": "", "Quantity Price 3": "", "Quantity Lower Bound 4": "", "Quantity Price 4": "", "Quantity Lower Bound 5": "", "Quantity Price 5": "", "merchant-shipping-group": "Migrated Template", "Progressive Price Type": "", "Progressive Lower Bound 1": "", "Progressive Price 1": "", "Progressive Lower Bound 2": "", "Progressive Price 2": "", "Progressive Lower Bound 3": "", "Progressive Price 3": "", "dataEndTime": "2022-07-31"}, "emitted_at": 1701976405556} {"stream": "ListFinancialEvents", "data": {"ShipmentEventList": [], "ShipmentSettleEventList": [], "RefundEventList": [], "GuaranteeClaimEventList": [], "ChargebackEventList": [], "PayWithAmazonEventList": [], "ServiceProviderCreditEventList": [], "RetrochargeEventList": [], "RentalTransactionEventList": [], "PerformanceBondRefundEventList": [], "ProductAdsPaymentEventList": [{"postedDate": "2022-07-28T20:06:07Z", "transactionType": "Charge", "invoiceId": "TR1T7Z7DR-1", "baseValue": {"CurrencyCode": "USD", "CurrencyAmount": -9.08}, "taxValue": {"CurrencyCode": "USD", "CurrencyAmount": 0.0}, "transactionValue": {"CurrencyCode": "USD", "CurrencyAmount": -9.08}}], "ServiceFeeEventList": [], "SellerDealPaymentEventList": [], "DebtRecoveryEventList": [], "LoanServicingEventList": [], "AdjustmentEventList": [], "SAFETReimbursementEventList": [], "SellerReviewEnrollmentPaymentEventList": [], "FBALiquidationEventList": [], "CouponPaymentEventList": [], "ImagingServicesFeeEventList": [], "NetworkComminglingTransactionEventList": [], "AffordabilityExpenseEventList": [], "AffordabilityExpenseReversalEventList": [], "RemovalShipmentEventList": [], "RemovalShipmentAdjustmentEventList": [], "TrialShipmentEventList": [], "TDSReimbursementEventList": [], "AdhocDisbursementEventList": [], "TaxWithholdingEventList": [], "ChargeRefundEventList": [], "FailedAdhocDisbursementEventList": [], "ValueAddedServiceChargeEventList": [], "CapacityReservationBillingEventList": [], "PostedBefore": "2022-07-31T00:00:00Z"}, "emitted_at": 1701976465145} {"stream": "ListFinancialEventGroups", "data": {"FinancialEventGroupId": "6uFLEEa3LQgyvcccMnVQ4Bj-I5zkOVNoM41q8leJzLk", "ProcessingStatus": "Closed", "FundTransferStatus": "Unknown", "OriginalTotal": {"CurrencyCode": "USD", "CurrencyAmount": -58.86}, "FundTransferDate": "2022-08-08T22:51:31Z", "BeginningBalance": {"CurrencyCode": "USD", "CurrencyAmount": -39.99}, "FinancialEventGroupStart": "2021-07-26T22:51:30Z", "FinancialEventGroupEnd": "2022-08-08T22:51:31Z"}, "emitted_at": 1701976502869} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/future_state.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/future_state.json index dcdfa0d0f38c..9bf0b19e4d41 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/future_state.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/future_state.json @@ -32,28 +32,6 @@ } } }, - { - "type": "STREAM", - "stream": { - "stream_state": { - "dataEndTime": "2121-07-01" - }, - "stream_descriptor": { - "name": "GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT" - } - } - }, - { - "type": "STREAM", - "stream": { - "stream_state": { - "dataEndTime": "2121-07-01" - }, - "stream_descriptor": { - "name": "GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT" - } - } - }, { "type": "STREAM", "stream": { diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml index d0c077853b9f..e6a6b883ff35 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml @@ -15,7 +15,7 @@ data: connectorSubtype: api connectorType: source definitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 - dockerImageTag: 3.0.1 + dockerImageTag: 3.1.0 dockerRepository: airbyte/source-amazon-seller-partner documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-seller-partner githubIssueLabel: source-amazon-seller-partner diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json deleted file mode 100644 index 1714a688b4de..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "title": "Brand Analytics Alternate Purchase Reports", - "description": "Brand Analytics Alternate Purchase Reports", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "startDate": { - "type": ["null", "string"], - "format": "date" - }, - "endDate": { - "type": ["null", "string"], - "format": "date" - }, - "asin": { - "type": ["null", "string"] - }, - "purchasedAsin": { - "type": ["null", "string"] - }, - "purchasedRank": { - "type": ["null", "integer"] - }, - "purchasedPct": { - "type": ["null", "number"] - }, - "dataEndTime": { - "type": ["null", "string"], - "format": "date" - }, - "queryEndDate": { - "type": ["null", "string"], - "format": "date" - } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT.json deleted file mode 100644 index ee646b8a14c8..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "title": "Brand Analytics Item Comparison Reports", - "description": "Brand Analytics Item Comparison Reports", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "startDate": { - "type": ["null", "string"], - "format": "date" - }, - "endDate": { - "type": ["null", "string"], - "format": "date" - }, - "asin": { - "type": ["null", "string"] - }, - "comparedAsin": { - "type": ["null", "string"] - }, - "comparedRank": { - "type": ["null", "integer"] - }, - "comparedPct": { - "type": ["null", "number"] - }, - "dataEndTime": { - "type": ["null", "string"], - "format": "date" - }, - "queryEndDate": { - "type": ["null", "string"], - "format": "date" - } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py index 87cfc34e636a..6f7c6a4b9c5f 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py @@ -15,8 +15,6 @@ from source_amazon_seller_partner.auth import AWSAuthenticator from source_amazon_seller_partner.constants import get_marketplaces from source_amazon_seller_partner.streams import ( - BrandAnalyticsAlternatePurchaseReports, - BrandAnalyticsItemComparisonReports, BrandAnalyticsMarketBasketReports, BrandAnalyticsRepeatPurchaseReports, BrandAnalyticsSearchTermsReports, @@ -187,8 +185,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: BrandAnalyticsMarketBasketReports, BrandAnalyticsSearchTermsReports, BrandAnalyticsRepeatPurchaseReports, - BrandAnalyticsAlternatePurchaseReports, - BrandAnalyticsItemComparisonReports, SellerAnalyticsSalesAndTrafficReports, VendorSalesReports, VendorInventoryReports, diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json index a74cf6ea9079..9f84b550d8e7 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json @@ -132,8 +132,6 @@ "GET_AFN_INVENTORY_DATA", "GET_AFN_INVENTORY_DATA_BY_COUNTRY", "GET_AMAZON_FULFILLED_SHIPMENTS_DATA_GENERAL", - "GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT", - "GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT", "GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT", "GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT", "GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT", diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index d65c6db58077..c0d4afcb3c60 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -785,16 +785,6 @@ class BrandAnalyticsRepeatPurchaseReports(IncrementalAnalyticsStream): result_key = "dataByAsin" -class BrandAnalyticsAlternatePurchaseReports(IncrementalAnalyticsStream): - name = "GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT" - result_key = "dataByAsin" - - -class BrandAnalyticsItemComparisonReports(IncrementalAnalyticsStream): - name = "GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT" - result_key = "dataByAsin" - - class VendorInventoryReports(IncrementalAnalyticsStream): """ Field definitions: https://developer-docs.amazon.com/sp-api/docs/report-type-values#vendor-retail-analytics-reports diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index 1d3f2606e3b7..7bcb6d06f3ed 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -77,8 +77,6 @@ The Amazon Seller Partner source connector supports the following [sync modes](h - [Active Listings Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values-inventory) \(incremental\) - [All Listings Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values-inventory) \(incremental\) - [Amazon Search Terms Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values-analytics#brand-analytics-reports) \(only available in OSS, incremental\) -- [Brand Analytics Alternate Purchase Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values-analytics#brand-analytics-reports) \(only available in OSS, incremental\) -- [Brand Analytics Item Comparison Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values-analytics#brand-analytics-reports) \(only available in OSS, incremental\) - [Browse Tree Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values-browse-tree) \(incremental\) - [Canceled Listings Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values-inventory) \(incremental\) - [FBA Amazon Fulfilled Inventory Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values-fba#fba-inventory-reports) \(incremental\) @@ -155,6 +153,7 @@ Information about rate limits you may find [here](https://developer-docs.amazon. | Version | Date | Pull Request | Subject | |:---------|:-----------|:------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `3.1.0` | 2024-01-17 | [\#34283](https://github.com/airbytehq/airbyte/pull/34283) | Delete deprecated streams | | `3.0.1` | 2023-12-22 | [\#33741](https://github.com/airbytehq/airbyte/pull/33741) | Improve report streams performance | | `3.0.0` | 2023-12-12 | [\#32977](https://github.com/airbytehq/airbyte/pull/32977) | Make all streams incremental | | `2.5.0` | 2023-11-27 | [\#32505](https://github.com/airbytehq/airbyte/pull/32505) | Make report options configurable via UI | From 03c725b9506cdc393b0f1d65888088b49d73fd83 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Wed, 17 Jan 2024 14:58:26 +0100 Subject: [PATCH 07/12] Unify lowcode tags (#34284) --- .../source-configuration-based/metadata.yaml.hbs | 2 +- .../connectors/source-apify-dataset/metadata.yaml | 2 +- airbyte-integrations/connectors/source-appfollow/metadata.yaml | 2 +- airbyte-integrations/connectors/source-auth0/metadata.yaml | 2 +- airbyte-integrations/connectors/source-babelforce/metadata.yaml | 2 +- airbyte-integrations/connectors/source-chargify/metadata.yaml | 2 +- airbyte-integrations/connectors/source-clockify/metadata.yaml | 2 +- .../connectors/source-commercetools/metadata.yaml | 2 +- airbyte-integrations/connectors/source-copper/metadata.yaml | 2 +- .../connectors/source-customer-io/metadata.yaml | 2 +- airbyte-integrations/connectors/source-dixa/metadata.yaml | 2 +- airbyte-integrations/connectors/source-dockerhub/metadata.yaml | 2 +- airbyte-integrations/connectors/source-drift/metadata.yaml | 2 +- .../connectors/source-exchange-rates/metadata.yaml | 2 +- airbyte-integrations/connectors/source-fastbill/metadata.yaml | 2 +- .../connectors/source-freshcaller/metadata.yaml | 2 +- .../connectors/source-freshservice/metadata.yaml | 2 +- airbyte-integrations/connectors/source-glassfrog/metadata.yaml | 2 +- airbyte-integrations/connectors/source-harness/metadata.yaml | 2 +- airbyte-integrations/connectors/source-hubplanner/metadata.yaml | 2 +- airbyte-integrations/connectors/source-insightly/metadata.yaml | 2 +- airbyte-integrations/connectors/source-lemlist/metadata.yaml | 2 +- airbyte-integrations/connectors/source-nasa/metadata.yaml | 2 +- airbyte-integrations/connectors/source-onesignal/metadata.yaml | 2 +- .../connectors/source-open-exchange-rates/metadata.yaml | 2 +- .../connectors/source-openweather/metadata.yaml | 2 +- airbyte-integrations/connectors/source-opsgenie/metadata.yaml | 2 +- airbyte-integrations/connectors/source-orbit/metadata.yaml | 2 +- airbyte-integrations/connectors/source-pagerduty/metadata.yaml | 2 +- airbyte-integrations/connectors/source-persistiq/metadata.yaml | 2 +- airbyte-integrations/connectors/source-pipedrive/metadata.yaml | 2 +- airbyte-integrations/connectors/source-pokeapi/metadata.yaml | 2 +- .../connectors/source-public-apis/metadata.yaml | 2 +- airbyte-integrations/connectors/source-qonto/metadata.yaml | 2 +- airbyte-integrations/connectors/source-qualaroo/metadata.yaml | 2 +- airbyte-integrations/connectors/source-serpstat/metadata.yaml | 2 +- airbyte-integrations/connectors/source-shortio/metadata.yaml | 2 +- airbyte-integrations/connectors/source-todoist/metadata.yaml | 2 +- .../connectors/source-visma-economic/metadata.yaml | 2 +- airbyte-integrations/connectors/source-wrike/metadata.yaml | 2 +- airbyte-integrations/connectors/source-younium/metadata.yaml | 2 +- .../connectors/source-zendesk-sell/metadata.yaml | 2 +- airbyte-integrations/connectors/source-zenefits/metadata.yaml | 2 +- 43 files changed, 43 insertions(+), 43 deletions(-) diff --git a/airbyte-integrations/connector-templates/source-configuration-based/metadata.yaml.hbs b/airbyte-integrations/connector-templates/source-configuration-based/metadata.yaml.hbs index f162a414b44d..418482a0b246 100644 --- a/airbyte-integrations/connector-templates/source-configuration-based/metadata.yaml.hbs +++ b/airbyte-integrations/connector-templates/source-configuration-based/metadata.yaml.hbs @@ -26,5 +26,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/{{dashCase name}} tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-apify-dataset/metadata.yaml b/airbyte-integrations/connectors/source-apify-dataset/metadata.yaml index f3421f116b82..0e4e0668f3d4 100644 --- a/airbyte-integrations/connectors/source-apify-dataset/metadata.yaml +++ b/airbyte-integrations/connectors/source-apify-dataset/metadata.yaml @@ -29,5 +29,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/apify-dataset tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-appfollow/metadata.yaml b/airbyte-integrations/connectors/source-appfollow/metadata.yaml index e65309bd6e97..7e9cb43f0cac 100644 --- a/airbyte-integrations/connectors/source-appfollow/metadata.yaml +++ b/airbyte-integrations/connectors/source-appfollow/metadata.yaml @@ -20,7 +20,7 @@ data: releaseStage: alpha documentationUrl: https://docs.airbyte.com/integrations/sources/appfollow tags: - - language:lowcode + - language:low-code releases: breakingChanges: 1.0.0: diff --git a/airbyte-integrations/connectors/source-auth0/metadata.yaml b/airbyte-integrations/connectors/source-auth0/metadata.yaml index 5eb0080a09cb..9566ee9d7800 100644 --- a/airbyte-integrations/connectors/source-auth0/metadata.yaml +++ b/airbyte-integrations/connectors/source-auth0/metadata.yaml @@ -26,5 +26,5 @@ data: releaseStage: alpha supportLevel: community tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-babelforce/metadata.yaml b/airbyte-integrations/connectors/source-babelforce/metadata.yaml index 9425eae3c84b..f4f4e35ea4ba 100644 --- a/airbyte-integrations/connectors/source-babelforce/metadata.yaml +++ b/airbyte-integrations/connectors/source-babelforce/metadata.yaml @@ -20,7 +20,7 @@ data: releaseStage: alpha documentationUrl: https://docs.airbyte.com/integrations/sources/babelforce tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-chargify/metadata.yaml b/airbyte-integrations/connectors/source-chargify/metadata.yaml index f211a8bfecbc..cc211d82e70c 100644 --- a/airbyte-integrations/connectors/source-chargify/metadata.yaml +++ b/airbyte-integrations/connectors/source-chargify/metadata.yaml @@ -21,7 +21,7 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/chargify tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-clockify/metadata.yaml b/airbyte-integrations/connectors/source-clockify/metadata.yaml index f26a0e6f214c..90baae2689d8 100644 --- a/airbyte-integrations/connectors/source-clockify/metadata.yaml +++ b/airbyte-integrations/connectors/source-clockify/metadata.yaml @@ -21,7 +21,7 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/clockify tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-commercetools/metadata.yaml b/airbyte-integrations/connectors/source-commercetools/metadata.yaml index dcbb88d3fafe..5af0802c2c0e 100644 --- a/airbyte-integrations/connectors/source-commercetools/metadata.yaml +++ b/airbyte-integrations/connectors/source-commercetools/metadata.yaml @@ -20,7 +20,7 @@ data: releaseStage: alpha documentationUrl: https://docs.airbyte.com/integrations/sources/commercetools tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-copper/metadata.yaml b/airbyte-integrations/connectors/source-copper/metadata.yaml index cfa55715d0c4..283dd98e3a15 100644 --- a/airbyte-integrations/connectors/source-copper/metadata.yaml +++ b/airbyte-integrations/connectors/source-copper/metadata.yaml @@ -20,5 +20,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/copper tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-customer-io/metadata.yaml b/airbyte-integrations/connectors/source-customer-io/metadata.yaml index 548f40de65ea..3cf0252a2512 100644 --- a/airbyte-integrations/connectors/source-customer-io/metadata.yaml +++ b/airbyte-integrations/connectors/source-customer-io/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/customer-io tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-dixa/metadata.yaml b/airbyte-integrations/connectors/source-dixa/metadata.yaml index 2bd71f0c367a..9d45d0bb45f7 100644 --- a/airbyte-integrations/connectors/source-dixa/metadata.yaml +++ b/airbyte-integrations/connectors/source-dixa/metadata.yaml @@ -21,7 +21,7 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/dixa tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-dockerhub/metadata.yaml b/airbyte-integrations/connectors/source-dockerhub/metadata.yaml index 3afe52544412..e0b403736301 100644 --- a/airbyte-integrations/connectors/source-dockerhub/metadata.yaml +++ b/airbyte-integrations/connectors/source-dockerhub/metadata.yaml @@ -21,7 +21,7 @@ data: releaseStage: alpha documentationUrl: https://docs.airbyte.com/integrations/sources/dockerhub tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-drift/metadata.yaml b/airbyte-integrations/connectors/source-drift/metadata.yaml index f17ce487e236..12e3531ac2c3 100644 --- a/airbyte-integrations/connectors/source-drift/metadata.yaml +++ b/airbyte-integrations/connectors/source-drift/metadata.yaml @@ -21,7 +21,7 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/drift tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-exchange-rates/metadata.yaml b/airbyte-integrations/connectors/source-exchange-rates/metadata.yaml index dc7062891b8f..20d3fa38cb7c 100644 --- a/airbyte-integrations/connectors/source-exchange-rates/metadata.yaml +++ b/airbyte-integrations/connectors/source-exchange-rates/metadata.yaml @@ -22,5 +22,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/exchange-rates tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-fastbill/metadata.yaml b/airbyte-integrations/connectors/source-fastbill/metadata.yaml index 805d8dd61b3c..9a6e795f561d 100644 --- a/airbyte-integrations/connectors/source-fastbill/metadata.yaml +++ b/airbyte-integrations/connectors/source-fastbill/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/fastbill tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-freshcaller/metadata.yaml b/airbyte-integrations/connectors/source-freshcaller/metadata.yaml index 5d8dc4615aa9..ac2d4fb29261 100644 --- a/airbyte-integrations/connectors/source-freshcaller/metadata.yaml +++ b/airbyte-integrations/connectors/source-freshcaller/metadata.yaml @@ -17,7 +17,7 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/freshcaller tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-freshservice/metadata.yaml b/airbyte-integrations/connectors/source-freshservice/metadata.yaml index 672168762c1a..43c5dd48d13f 100644 --- a/airbyte-integrations/connectors/source-freshservice/metadata.yaml +++ b/airbyte-integrations/connectors/source-freshservice/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/freshservice tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-glassfrog/metadata.yaml b/airbyte-integrations/connectors/source-glassfrog/metadata.yaml index 051a5ef1d90c..b75ae80dbeb8 100644 --- a/airbyte-integrations/connectors/source-glassfrog/metadata.yaml +++ b/airbyte-integrations/connectors/source-glassfrog/metadata.yaml @@ -20,7 +20,7 @@ data: releaseStage: alpha documentationUrl: https://docs.airbyte.com/integrations/sources/glassfrog tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-harness/metadata.yaml b/airbyte-integrations/connectors/source-harness/metadata.yaml index 857504fc2505..a33d83b93859 100644 --- a/airbyte-integrations/connectors/source-harness/metadata.yaml +++ b/airbyte-integrations/connectors/source-harness/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/harness tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-hubplanner/metadata.yaml b/airbyte-integrations/connectors/source-hubplanner/metadata.yaml index ab23f4f28def..59487dacb6f1 100644 --- a/airbyte-integrations/connectors/source-hubplanner/metadata.yaml +++ b/airbyte-integrations/connectors/source-hubplanner/metadata.yaml @@ -21,7 +21,7 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/hubplanner tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-insightly/metadata.yaml b/airbyte-integrations/connectors/source-insightly/metadata.yaml index 321d751ffa7a..78f6fc859bf6 100644 --- a/airbyte-integrations/connectors/source-insightly/metadata.yaml +++ b/airbyte-integrations/connectors/source-insightly/metadata.yaml @@ -20,7 +20,7 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/insightly tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-lemlist/metadata.yaml b/airbyte-integrations/connectors/source-lemlist/metadata.yaml index 1b9d60189c19..cf4bd7c33ff8 100644 --- a/airbyte-integrations/connectors/source-lemlist/metadata.yaml +++ b/airbyte-integrations/connectors/source-lemlist/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/lemlist tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-nasa/metadata.yaml b/airbyte-integrations/connectors/source-nasa/metadata.yaml index f5fa8ac2e653..1cd017174495 100644 --- a/airbyte-integrations/connectors/source-nasa/metadata.yaml +++ b/airbyte-integrations/connectors/source-nasa/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/nasa tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-onesignal/metadata.yaml b/airbyte-integrations/connectors/source-onesignal/metadata.yaml index c63f53736d6d..cedb38e32be9 100644 --- a/airbyte-integrations/connectors/source-onesignal/metadata.yaml +++ b/airbyte-integrations/connectors/source-onesignal/metadata.yaml @@ -20,6 +20,6 @@ data: releaseDate: 2023-08-31 releaseStage: alpha tags: - - language:lowcode + - language:low-code supportLevel: community metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-open-exchange-rates/metadata.yaml b/airbyte-integrations/connectors/source-open-exchange-rates/metadata.yaml index f93f96677127..7c5f21637fae 100644 --- a/airbyte-integrations/connectors/source-open-exchange-rates/metadata.yaml +++ b/airbyte-integrations/connectors/source-open-exchange-rates/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/open-exchange-rates tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-openweather/metadata.yaml b/airbyte-integrations/connectors/source-openweather/metadata.yaml index 1e71bb97fc7f..b15d5744b4f9 100644 --- a/airbyte-integrations/connectors/source-openweather/metadata.yaml +++ b/airbyte-integrations/connectors/source-openweather/metadata.yaml @@ -20,5 +20,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/openweather tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-opsgenie/metadata.yaml b/airbyte-integrations/connectors/source-opsgenie/metadata.yaml index 4af87df040a9..17edbf7b36e5 100644 --- a/airbyte-integrations/connectors/source-opsgenie/metadata.yaml +++ b/airbyte-integrations/connectors/source-opsgenie/metadata.yaml @@ -15,7 +15,7 @@ data: releaseStage: alpha documentationUrl: https://docs.airbyte.com/integrations/sources/opsgenie tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-orbit/metadata.yaml b/airbyte-integrations/connectors/source-orbit/metadata.yaml index 2d19e453ccdc..5accdc194140 100644 --- a/airbyte-integrations/connectors/source-orbit/metadata.yaml +++ b/airbyte-integrations/connectors/source-orbit/metadata.yaml @@ -21,7 +21,7 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/orbit tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-pagerduty/metadata.yaml b/airbyte-integrations/connectors/source-pagerduty/metadata.yaml index 403e4bd393d7..0b471649a4a4 100644 --- a/airbyte-integrations/connectors/source-pagerduty/metadata.yaml +++ b/airbyte-integrations/connectors/source-pagerduty/metadata.yaml @@ -21,7 +21,7 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/pagerduty tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-persistiq/metadata.yaml b/airbyte-integrations/connectors/source-persistiq/metadata.yaml index ded7693cd2c9..2d06105f0138 100644 --- a/airbyte-integrations/connectors/source-persistiq/metadata.yaml +++ b/airbyte-integrations/connectors/source-persistiq/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/persistiq tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-pipedrive/metadata.yaml b/airbyte-integrations/connectors/source-pipedrive/metadata.yaml index 74b572e3fad6..5f0810ee4556 100644 --- a/airbyte-integrations/connectors/source-pipedrive/metadata.yaml +++ b/airbyte-integrations/connectors/source-pipedrive/metadata.yaml @@ -29,5 +29,5 @@ data: releaseStage: alpha supportLevel: community tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-pokeapi/metadata.yaml b/airbyte-integrations/connectors/source-pokeapi/metadata.yaml index 85047f016528..076a75a780a4 100644 --- a/airbyte-integrations/connectors/source-pokeapi/metadata.yaml +++ b/airbyte-integrations/connectors/source-pokeapi/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/pokeapi tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-public-apis/metadata.yaml b/airbyte-integrations/connectors/source-public-apis/metadata.yaml index 0df0a0c7da8e..c83aba288893 100644 --- a/airbyte-integrations/connectors/source-public-apis/metadata.yaml +++ b/airbyte-integrations/connectors/source-public-apis/metadata.yaml @@ -23,5 +23,5 @@ data: releaseStage: alpha supportLevel: community tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-qonto/metadata.yaml b/airbyte-integrations/connectors/source-qonto/metadata.yaml index 83239b6c7d13..05c0393aa6ec 100644 --- a/airbyte-integrations/connectors/source-qonto/metadata.yaml +++ b/airbyte-integrations/connectors/source-qonto/metadata.yaml @@ -13,5 +13,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/qonto tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-qualaroo/metadata.yaml b/airbyte-integrations/connectors/source-qualaroo/metadata.yaml index 7364d8abe5c3..e291b82f879a 100644 --- a/airbyte-integrations/connectors/source-qualaroo/metadata.yaml +++ b/airbyte-integrations/connectors/source-qualaroo/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/qualaroo tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-serpstat/metadata.yaml b/airbyte-integrations/connectors/source-serpstat/metadata.yaml index e764c9c80059..fb9d7de24737 100644 --- a/airbyte-integrations/connectors/source-serpstat/metadata.yaml +++ b/airbyte-integrations/connectors/source-serpstat/metadata.yaml @@ -18,5 +18,5 @@ data: releaseStage: alpha documentationUrl: https://docs.airbyte.com/integrations/sources/serpstat tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-shortio/metadata.yaml b/airbyte-integrations/connectors/source-shortio/metadata.yaml index ad64fec0533c..6eced88a6f02 100644 --- a/airbyte-integrations/connectors/source-shortio/metadata.yaml +++ b/airbyte-integrations/connectors/source-shortio/metadata.yaml @@ -23,7 +23,7 @@ data: documentationUrl: https://docs.airbyte.com/integrations/sources/shortio tags: - language:python - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-todoist/metadata.yaml b/airbyte-integrations/connectors/source-todoist/metadata.yaml index c6fd1c2b58ec..b578c8f07755 100644 --- a/airbyte-integrations/connectors/source-todoist/metadata.yaml +++ b/airbyte-integrations/connectors/source-todoist/metadata.yaml @@ -26,5 +26,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/todoist tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-visma-economic/metadata.yaml b/airbyte-integrations/connectors/source-visma-economic/metadata.yaml index ec378e98d2b4..56e6d7330ce5 100644 --- a/airbyte-integrations/connectors/source-visma-economic/metadata.yaml +++ b/airbyte-integrations/connectors/source-visma-economic/metadata.yaml @@ -21,5 +21,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/visma-economic tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-wrike/metadata.yaml b/airbyte-integrations/connectors/source-wrike/metadata.yaml index 9e57391670c9..7a6309f77cb9 100644 --- a/airbyte-integrations/connectors/source-wrike/metadata.yaml +++ b/airbyte-integrations/connectors/source-wrike/metadata.yaml @@ -23,5 +23,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/wrike tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-younium/metadata.yaml b/airbyte-integrations/connectors/source-younium/metadata.yaml index 892a1ee423c0..c8d91423df43 100644 --- a/airbyte-integrations/connectors/source-younium/metadata.yaml +++ b/airbyte-integrations/connectors/source-younium/metadata.yaml @@ -23,5 +23,5 @@ data: supportLevel: community documentationUrl: https://docs.airbyte.com/integrations/sources/younium tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/source-zendesk-sell/metadata.yaml b/airbyte-integrations/connectors/source-zendesk-sell/metadata.yaml index 803cca0d539b..5f11fc4305bf 100644 --- a/airbyte-integrations/connectors/source-zendesk-sell/metadata.yaml +++ b/airbyte-integrations/connectors/source-zendesk-sell/metadata.yaml @@ -19,7 +19,7 @@ data: releaseStage: alpha documentationUrl: https://docs.airbyte.com/integrations/sources/zendesk-sell tags: - - language:lowcode + - language:low-code ab_internal: sl: 100 ql: 100 diff --git a/airbyte-integrations/connectors/source-zenefits/metadata.yaml b/airbyte-integrations/connectors/source-zenefits/metadata.yaml index 7392e0333eec..b5eec18af058 100644 --- a/airbyte-integrations/connectors/source-zenefits/metadata.yaml +++ b/airbyte-integrations/connectors/source-zenefits/metadata.yaml @@ -24,5 +24,5 @@ data: ql: 100 supportLevel: community tags: - - language:lowcode + - language:low-code metadataSpecVersion: "1.0" From be06855dc02bdb1ebbf5dfee716c156b824a017f Mon Sep 17 00:00:00 2001 From: Akash Kulkarni <113392464+akashkulk@users.noreply.github.com> Date: Wed, 17 Jan 2024 09:04:31 -0800 Subject: [PATCH 08/12] [Source-mongo] : Relax minimum document discovery size to 100 (#34314) --- .../source-mongodb-v2/integration_tests/expected_spec.json | 2 +- airbyte-integrations/connectors/source-mongodb-v2/metadata.yaml | 2 +- .../connectors/source-mongodb-v2/src/main/resources/spec.json | 2 +- docs/integrations/sources/mongodb-v2.md | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-mongodb-v2/integration_tests/expected_spec.json b/airbyte-integrations/connectors/source-mongodb-v2/integration_tests/expected_spec.json index 54e3d7aa189c..8de6fd3bc30e 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/integration_tests/expected_spec.json +++ b/airbyte-integrations/connectors/source-mongodb-v2/integration_tests/expected_spec.json @@ -164,7 +164,7 @@ "description": "The maximum number of documents to sample when attempting to discover the unique fields for a collection.", "default": 10000, "order": 10, - "minimum": 1000, + "minimum": 100, "maximum": 100000, "group": "advanced" } diff --git a/airbyte-integrations/connectors/source-mongodb-v2/metadata.yaml b/airbyte-integrations/connectors/source-mongodb-v2/metadata.yaml index fb5be28de49d..086852451f42 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/metadata.yaml +++ b/airbyte-integrations/connectors/source-mongodb-v2/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: database connectorType: source definitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e - dockerImageTag: 1.2.1 + dockerImageTag: 1.2.2 dockerRepository: airbyte/source-mongodb-v2 documentationUrl: https://docs.airbyte.com/integrations/sources/mongodb-v2 githubIssueLabel: source-mongodb-v2 diff --git a/airbyte-integrations/connectors/source-mongodb-v2/src/main/resources/spec.json b/airbyte-integrations/connectors/source-mongodb-v2/src/main/resources/spec.json index ae6b822110ca..4acd5c67d25f 100644 --- a/airbyte-integrations/connectors/source-mongodb-v2/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/source-mongodb-v2/src/main/resources/spec.json @@ -164,7 +164,7 @@ "description": "The maximum number of documents to sample when attempting to discover the unique fields for a collection.", "default": 10000, "order": 10, - "minimum": 1000, + "minimum": 100, "maximum": 100000, "group": "advanced" } diff --git a/docs/integrations/sources/mongodb-v2.md b/docs/integrations/sources/mongodb-v2.md index bc9ffb1820e2..c7821542174b 100644 --- a/docs/integrations/sources/mongodb-v2.md +++ b/docs/integrations/sources/mongodb-v2.md @@ -214,6 +214,7 @@ For more information regarding configuration parameters, please see [MongoDb Doc | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:----------------------------------------------------------------------------------------------------------| +| 1.2.2 | 2024-01-16 | [34314](https://github.com/airbytehq/airbyte/pull/34314) | Reduce minimum document discovery size to 100. | | 1.2.1 | 2023-12-18 | [33549](https://github.com/airbytehq/airbyte/pull/33549) | Add logging to understand op log size. | | 1.2.0 | 2023-12-18 | [33438](https://github.com/airbytehq/airbyte/pull/33438) | Remove LEGACY state flag | | 1.1.0 | 2023-12-14 | [32328](https://github.com/airbytehq/airbyte/pull/32328) | Schema less mode in mongodb. | From 63c6961e78c6a5c77ccc70c588ba934dbd2fa885 Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com> Date: Wed, 17 Jan 2024 19:15:09 +0200 Subject: [PATCH 09/12] =?UTF-8?q?=E2=9C=A8=20Source=20S3:=20Add=20IAM=20Ro?= =?UTF-8?q?le=20Authentication=20(#33818)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../source-s3/acceptance-test-config.yml | 18 ++++ .../source-s3/integration_tests/acceptance.py | 34 +++++++- .../integration_tests/cloud_spec.json | 13 +++ .../source-s3/integration_tests/spec.json | 13 +++ .../connectors/source-s3/metadata.yaml | 2 +- .../connectors/source-s3/setup.py | 8 +- .../connectors/source-s3/source_s3/source.py | 10 ++- .../source-s3/source_s3/v4/config.py | 9 +- .../source-s3/source_s3/v4/stream_reader.py | 69 ++++++++++++++-- .../unit_tests/v4/test_stream_reader.py | 31 +++++++ docs/integrations/sources/s3.md | 82 +++++++++++++++++-- 11 files changed, 263 insertions(+), 26 deletions(-) diff --git a/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml b/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml index 252460fff68d..4d8db46aba24 100644 --- a/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-s3/acceptance-test-config.yml @@ -6,6 +6,11 @@ acceptance_tests: path: integration_tests/expected_records/csv.jsonl exact_order: true timeout_seconds: 1800 + - config_path: secrets/config_iam_role.json + expect_records: + path: integration_tests/expected_records/csv.jsonl + exact_order: true + timeout_seconds: 1800 - config_path: secrets/v4_csv_custom_encoding_config.json expect_records: path: integration_tests/expected_records/legacy_csv_custom_encoding.jsonl @@ -110,6 +115,8 @@ acceptance_tests: tests: - config_path: secrets/config.json status: succeed + - config_path: secrets/config_iam_role.json + status: succeed - config_path: secrets/v4_csv_custom_encoding_config.json status: succeed - config_path: secrets/v4_csv_custom_format_config.json @@ -148,6 +155,9 @@ acceptance_tests: status: failed discovery: tests: + - config_path: secrets/config_iam_role.json + backward_compatibility_tests_config: + disable_for_version: "4.4.0" # new authentication added - IAM role - config_path: secrets/config.json backward_compatibility_tests_config: disable_for_version: "4.0.3" # removing the `streams.*.file_type` field which was redundant with `streams.*.format` @@ -201,6 +211,9 @@ acceptance_tests: - config_path: secrets/config.json configured_catalog_path: integration_tests/configured_catalogs/csv.json timeout_seconds: 1800 + - config_path: secrets/config_iam_role.json + configured_catalog_path: integration_tests/configured_catalogs/csv.json + timeout_seconds: 1800 - config_path: secrets/v4_parquet_config.json configured_catalog_path: integration_tests/configured_catalogs/parquet.json timeout_seconds: 1800 @@ -236,6 +249,11 @@ acceptance_tests: future_state: future_state_path: integration_tests/abnormal_state.json timeout_seconds: 1800 + - config_path: secrets/config_iam_role.json + configured_catalog_path: integration_tests/configured_catalogs/csv.json + future_state: + future_state_path: integration_tests/abnormal_state.json + timeout_seconds: 1800 - config_path: secrets/v4_parquet_config.json configured_catalog_path: integration_tests/configured_catalogs/parquet.json future_state: diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-s3/integration_tests/acceptance.py index 6b0c294530cd..706e9eba88be 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/acceptance.py +++ b/airbyte-integrations/connectors/source-s3/integration_tests/acceptance.py @@ -3,14 +3,46 @@ # +import json +import logging +from pathlib import Path from typing import Iterable import pytest +import yaml pytest_plugins = ("connector_acceptance_test.plugin",) +logger = logging.getLogger("airbyte") @pytest.fixture(scope="session", autouse=True) def connector_setup() -> Iterable[None]: - """This fixture is a placeholder for external resources that acceptance test might require.""" + """This fixture is responsible for configuring AWS credentials that are used for assuming role during the IAM role based authentication.""" + config_file_path = "secrets/config_iam_role.json" + acceptance_test_config_file_path = "acceptance-test-config.yml" + + # Read environment variables from the JSON file + with open(config_file_path, "r") as file: + config = json.load(file) + + # Prepare environment variables to append to the YAML file + env_vars = { + "custom_environment_variables": { + "AWS_ASSUME_ROLE_EXTERNAL_ID": config["acceptance_test_aws_external_id"], + "AWS_ACCESS_KEY_ID": config["acceptance_test_aws_access_key_id"], + "AWS_SECRET_ACCESS_KEY": config["acceptance_test_aws_secret_access_key"], + } + } + + # Append environment variables to the YAML file + yaml_path = Path(acceptance_test_config_file_path) + if yaml_path.is_file(): + with open(acceptance_test_config_file_path, "r") as file: + existing_data = yaml.safe_load(file) or {} + existing_data.update(env_vars) + with open(acceptance_test_config_file_path, "w") as file: + yaml.safe_dump(existing_data, file) + else: + raise Exception(f"{acceptance_test_config_file_path} does not exist.") + yield diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json index b2593df8e5c9..ed084d3b08d3 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json +++ b/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json @@ -358,6 +358,12 @@ "order": 2, "type": "string" }, + "role_arn": { + "title": "AWS Role ARN", + "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", + "order": 6, + "type": "string" + }, "aws_secret_access_key": { "title": "AWS Secret Access Key", "description": "In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.", @@ -610,6 +616,13 @@ "order": 2, "type": "string" }, + "role_arn": { + "title": "AWS Role ARN", + "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", + "always_show": true, + "order": 6, + "type": "string" + }, "path_prefix": { "title": "Path Prefix", "description": "By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate.", diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json index dd3309240ada..76a48ffb09a5 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json @@ -365,6 +365,12 @@ "order": 3, "type": "string" }, + "role_arn": { + "title": "AWS Role ARN", + "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", + "order": 6, + "type": "string" + }, "endpoint": { "title": "Endpoint", "description": "Endpoint to an S3 compatible service. Leave empty to use AWS.", @@ -609,6 +615,13 @@ "order": 2, "type": "string" }, + "role_arn": { + "title": "AWS Role ARN", + "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", + "always_show": true, + "order": 6, + "type": "string" + }, "path_prefix": { "title": "Path Prefix", "description": "By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimize finding these in S3. This is optional but recommended if your bucket contains many folders/files which you don't need to replicate.", diff --git a/airbyte-integrations/connectors/source-s3/metadata.yaml b/airbyte-integrations/connectors/source-s3/metadata.yaml index 8f13506605fd..d660125e526b 100644 --- a/airbyte-integrations/connectors/source-s3/metadata.yaml +++ b/airbyte-integrations/connectors/source-s3/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: file connectorType: source definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 - dockerImageTag: 4.3.1 + dockerImageTag: 4.4.0 dockerRepository: airbyte/source-s3 documentationUrl: https://docs.airbyte.com/integrations/sources/s3 githubIssueLabel: source-s3 diff --git a/airbyte-integrations/connectors/source-s3/setup.py b/airbyte-integrations/connectors/source-s3/setup.py index e2d0000f2949..cd2c48e2924d 100644 --- a/airbyte-integrations/connectors/source-s3/setup.py +++ b/airbyte-integrations/connectors/source-s3/setup.py @@ -14,13 +14,7 @@ "python-snappy==0.6.1", ] -TEST_REQUIREMENTS = [ - "requests-mock~=1.9.3", - "pytest-mock~=3.6.1", - "pytest~=6.1", - "pandas==2.0.3", - "docker", -] +TEST_REQUIREMENTS = ["requests-mock~=1.9.3", "pytest-mock~=3.6.1", "pytest~=6.1", "pandas==2.0.3", "docker", "moto"] setup( name="source_s3", diff --git a/airbyte-integrations/connectors/source-s3/source_s3/source.py b/airbyte-integrations/connectors/source-s3/source_s3/source.py index 8621fe4bbb5a..224f7b036e4a 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/source.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/source.py @@ -1,8 +1,6 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - - from typing import Optional from pydantic import BaseModel, Field @@ -39,6 +37,14 @@ class Config: always_show=True, order=2, ) + role_arn: Optional[str] = Field( + title=f"AWS Role ARN", + default=None, + description="Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations " + f"requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", + always_show=True, + order=6, + ) path_prefix: str = Field( default="", description="By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, " diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py index 6275c0954388..55c3b5708f59 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py @@ -1,7 +1,6 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - from typing import Any, Dict, Optional import dpath.util @@ -31,6 +30,14 @@ def documentation_url(cls) -> AnyUrl: order=2, ) + role_arn: Optional[str] = Field( + title=f"AWS Role ARN", + default=None, + description="Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations " + f"requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.", + order=6, + ) + aws_secret_access_key: Optional[str] = Field( title="AWS Secret Access Key", default=None, diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py index d8bfbd5b16bc..0457dba4ee36 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py @@ -5,6 +5,7 @@ import logging from datetime import datetime from io import IOBase +from os import getenv from typing import Iterable, List, Optional, Set import boto3.session @@ -16,10 +17,14 @@ from airbyte_cdk.sources.file_based.remote_file import RemoteFile from botocore.client import BaseClient from botocore.client import Config as ClientConfig +from botocore.credentials import RefreshableCredentials from botocore.exceptions import ClientError +from botocore.session import get_session from source_s3.v4.config import Config from source_s3.v4.zip_reader import DecompressedStream, RemoteFileInsideArchive, ZipContentReader, ZipFileHandler +AWS_EXTERNAL_ID = getenv("AWS_ASSUME_ROLE_EXTERNAL_ID") + class SourceS3StreamReader(AbstractFileBasedStreamReader): def __init__(self): @@ -52,14 +57,66 @@ def s3_client(self) -> BaseClient: raise ValueError("Source config is missing; cannot create the S3 client.") if self._s3_client is None: client_kv_args = _get_s3_compatible_client_args(self.config) if self.config.endpoint else {} - self._s3_client = boto3.client( - "s3", - aws_access_key_id=self.config.aws_access_key_id, - aws_secret_access_key=self.config.aws_secret_access_key, - **client_kv_args, - ) + + if self.config.role_arn: + self._s3_client = self._get_iam_s3_client(client_kv_args) + else: + self._s3_client = boto3.client( + "s3", + aws_access_key_id=self.config.aws_access_key_id, + aws_secret_access_key=self.config.aws_secret_access_key, + **client_kv_args, + ) + return self._s3_client + def _get_iam_s3_client(self, client_kv_args: dict) -> BaseClient: + """ + Creates an S3 client using AWS Security Token Service (STS) with assumed role credentials. This method handles + the authentication process by assuming an IAM role, optionally using an external ID for enhanced security. + The obtained credentials are set to auto-refresh upon expiration, ensuring uninterrupted access to the S3 service. + + :param client_kv_args: A dictionary of key-value pairs for the boto3 S3 client constructor. + :return: An instance of a boto3 S3 client with the assumed role credentials. + + The method assumes a role specified in the `self.config.role_arn` and creates a session with the S3 service. + If `AWS_ASSUME_ROLE_EXTERNAL_ID` environment variable is set, it will be used during the role assumption for additional security. + """ + + def refresh(): + client = boto3.client("sts") + if AWS_EXTERNAL_ID: + role = client.assume_role( + RoleArn=self.config.role_arn, + RoleSessionName="airbyte-source-s3", + ExternalId=AWS_EXTERNAL_ID, + ) + else: + role = client.assume_role( + RoleArn=self.config.role_arn, + RoleSessionName="airbyte-source-s3", + ) + + creds = role.get("Credentials", {}) + return { + "access_key": creds["AccessKeyId"], + "secret_key": creds["SecretAccessKey"], + "token": creds["SessionToken"], + "expiry_time": creds["Expiration"].isoformat(), + } + + session_credentials = RefreshableCredentials.create_from_metadata( + metadata=refresh(), + refresh_using=refresh, + method="sts-assume-role", + ) + + session = get_session() + session._credentials = session_credentials + autorefresh_session = boto3.Session(botocore_session=session) + + return autorefresh_session.client("s3", **client_kv_args) + def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]: """ Get all files matching the specified glob patterns. diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_stream_reader.py b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_stream_reader.py index 05d7f7873be1..b1bede862d22 100644 --- a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_stream_reader.py @@ -16,6 +16,7 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import FileReadMode from airbyte_cdk.sources.file_based.remote_file import RemoteFile from botocore.stub import Stubber +from moto import mock_sts from pydantic import AnyUrl from source_s3.v4.config import Config from source_s3.v4.stream_reader import SourceS3StreamReader @@ -238,3 +239,33 @@ def set_stub(reader: SourceS3StreamReader, contents: List[Dict[str, Any]], multi ) s3_stub.activate() return s3_stub + + +@mock_sts +@patch("source_s3.v4.stream_reader.boto3.client") +def test_get_iam_s3_client(boto3_client_mock): + # Mock the STS client assume_role method + boto3_client_mock.return_value.assume_role.return_value = { + "Credentials": { + "AccessKeyId": "assumed_access_key_id", + "SecretAccessKey": "assumed_secret_access_key", + "SessionToken": "assumed_session_token", + "Expiration": datetime.now(), + } + } + + # Instantiate your stream reader and set the config + reader = SourceS3StreamReader() + reader.config = Config( + bucket="test", + role_arn="arn:aws:iam::123456789012:role/my-role", + streams=[], + endpoint=None, + ) + + # Call _get_iam_s3_client + with Stubber(reader.s3_client): + s3_client = reader._get_iam_s3_client({}) + + # Assertions to validate the s3 client + assert s3_client is not None diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index 36e5eb5fef31..2cea82eff718 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -15,7 +15,9 @@ Please note that using cloud storage may incur egress costs. Egress refers to da ### Step 1: Set up Amazon S3 -**If you are syncing from a private bucket**, you will need to provide both an `AWS Access Key ID` and `AWS Secret Access Key` to authenticate the connection. The IAM user associated with the credentials must be granted `read` and `list` permissions for the bucket and its objects. If you are unfamiliar with configuring AWS permissions, you can follow these steps to obtain the necessary permissions and credentials: +**If you are syncing from a private bucket**, you need to authenticate the connection. This can be done either by using an `IAM User` (with `AWS Access Key ID` and `Secret Access Key`) or an `IAM Role` (with `Role ARN`). Begin by creating a policy with the necessary permissions: + +#### Create a Policy 1. Log in to your Amazon AWS account and open the [IAM console](https://console.aws.amazon.com/iam/home#home). 2. In the IAM dashboard, select **Policies**, then click **Create Policy**. @@ -45,10 +47,69 @@ At this time, object-level permissions alone are not sufficient to successfully ::: 4. Give your policy a descriptive name, then click **Create policy**. -5. In the IAM dashboard, click **Users**. Select an existing IAM user or create a new one by clicking **Add users**. -6. If you are using an _existing_ IAM user, click the **Add permissions** dropdown menu and select **Add permissions**. If you are creating a _new_ user, you will be taken to the Permissions screen after selecting a name. -7. Select **Attach policies directly**, then find and check the box for your new policy. Click **Next**, then **Add permissions**. -8. After successfully creating your user, select the **Security credentials** tab and click **Create access key**. You will be prompted to select a use case and add optional tags to your access key. Click **Create access key** to generate the keys. + +#### Option 1: Using an IAM Role (Most secure) + + +:::note +This authentication method is currently in the testing phase. To enable it for your workspace, please contact our Support Team. +::: + + +1. In the IAM dashboard, click **Roles**, then **Create role**. +2. Choose the appropriate trust entity and attach the policy you created. +3. Set up a trust relationship for the role. For example for **AWS account** trusted entity use default AWS account on your instance (it will be used to assume role). To use **External ID** set it to environment variables as `export AWS_ASSUME_ROLE_EXTERNAL_ID="{your-external-id}"`. Edit the trust relationship policy to reflect this: +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam::{your-aws-account-id}:user/{your-username}" + }, + "Action": "sts:AssumeRole", + "Condition": { + "StringEquals": { + "sts:ExternalId": "{your-external-id}" + } + } + } + ] +} +``` + + +2. Choose the **AWS account** trusted entity type. +3. Set up a trust relationship for the role. This allows the Airbyte instance's AWS account to assume this role. You will also need to specify an external ID, which is a secret key that the trusting service (Airbyte) and the trusted role (the role you're creating) both know. This ID is used to prevent the "confused deputy" problem. The External ID should be your Airbyte workspace ID, which can be found in the URL of your workspace page. Edit the trust relationship policy to include the external ID: +``` +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam::094410056844:user/delegated_access_user" + }, + "Action": "sts:AssumeRole", + "Condition": { + "StringEquals": { + "sts:ExternalId": "{your-airbyte-workspace-id}" + } + } + } + ] +} +``` + +4. Complete the role creation and note the Role ARN. + +#### Option 2: Using an IAM User + +1. In the IAM dashboard, click **Users**. Select an existing IAM user or create a new one by clicking **Add users**. +2. If you are using an _existing_ IAM user, click the **Add permissions** dropdown menu and select **Add permissions**. If you are creating a _new_ user, you will be taken to the Permissions screen after selecting a name. +3. Select **Attach policies directly**, then find and check the box for your new policy. Click **Next**, then **Add permissions**. +4. After successfully creating your user, select the **Security credentials** tab and click **Create access key**. You will be prompted to select a use case and add optional tags to your access key. Click **Create access key** to generate the keys. :::caution Your `Secret Access Key` will only be visible once upon creation. Be sure to copy and store it securely for future use. @@ -69,7 +130,11 @@ For more information on managing your access keys, please refer to the 3. Give a **Name** to the stream 4. (Optional) - If you want to enforce a specific schema, you can enter a **Input schema**. By default, this value is set to `{}` and will automatically infer the schema from the file\(s\) you are replicating. For details on providing a custom schema, refer to the [User Schema section](#user-schema). 5. Optionally, enter the **Globs** which dictates which files to be synced. This is a regular expression that allows Airbyte to pattern match the specific files to replicate. If you are replicating all the files within your bucket, use `**` as the pattern. For more precise pattern matching options, refer to the [Path Patterns section](#path-patterns) below. -6. **If you are syncing from a private bucket**, you must fill the **AWS Access Key ID** and **AWS Secret Access Key** fields with the appropriate credentials to authenticate the connection. All other fields are optional and can be left empty. Refer to the [S3 Provider Settings section](#s3-provider-settings) below for more information on each field. +6. **To authenticate your private bucket**: + - If using an IAM role, enter the **AWS Role ARN**. + - If using IAM user credentials, fill the **AWS Access Key ID** and **AWS Secret Access Key** fields with the appropriate credentials. + +All other fields are optional and can be left empty. Refer to the [S3 Provider Settings section](#s3-provider-settings) below for more information on each field. ## Supported sync modes @@ -256,8 +321,9 @@ To perform the text extraction from PDF and Docx files, the connector uses the [ | Version | Date | Pull Request | Subject | |:--------|:-----------|:----------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------| -| 4.3.1 | 2024-01-04 | [33937](https://github.com/airbytehq/airbyte/pull/33937) | Prepare for airbyte-lib | -| 4.3.0 | 2023-12-14 | [33411](https://github.com/airbytehq/airbyte/pull/33411) | Bump CDK version to auto-set primary key for document file streams and support raw txt files | +| 4.4.0 | 2023-01-12 | [33818](https://github.com/airbytehq/airbyte/pull/33818) | Add IAM Role Authentication | +| 4.3.1 | 2024-01-04 | [33937](https://github.com/airbytehq/airbyte/pull/33937) | Prepare for airbyte-lib | +| 4.3.0 | 2023-12-14 | [33411](https://github.com/airbytehq/airbyte/pull/33411) | Bump CDK version to auto-set primary key for document file streams and support raw txt files | | 4.2.4 | 2023-12-06 | [33187](https://github.com/airbytehq/airbyte/pull/33187) | Bump CDK version to hide source-defined primary key | | 4.2.3 | 2023-11-16 | [32608](https://github.com/airbytehq/airbyte/pull/32608) | Improve document file type parser | | 4.2.2 | 2023-11-20 | [32677](https://github.com/airbytehq/airbyte/pull/32677) | Only read files with ".zip" extension as zipped files | From 006338257c96df99b3138b0fa85640637f314577 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Wed, 17 Jan 2024 10:32:21 -0800 Subject: [PATCH 10/12] Destination postgres: DV2 beta implementation (#34177) --- airbyte-cdk/java/airbyte-cdk/README.md | 1 + .../cdk/db/factory/DataSourceFactory.java | 76 ++--- .../io/airbyte/cdk/db/jdbc/JdbcDatabase.java | 41 +-- .../src/main/resources/version.properties | 2 +- .../jdbc/AbstractJdbcDestination.java | 19 +- .../destination/jdbc/JdbcSqlOperations.java | 26 +- .../JdbcDestinationHandler.java | 34 ++- .../typing_deduping/JdbcSqlGenerator.java | 38 ++- .../JdbcSqlGeneratorIntegrationTest.java | 4 +- .../JdbcTypingDedupingTest.java | 113 +++++++ .../typing_deduping/DefaultTyperDeduper.java | 1 + .../BaseSqlGeneratorIntegrationTest.java | 21 ++ .../alltypes_unsafe_inputrecords.jsonl | 3 + .../build.gradle | 2 +- .../gradle.properties | 4 +- .../metadata.yaml | 2 +- .../destination-postgres/build.gradle | 2 +- .../destination-postgres/gradle.properties | 4 +- .../destination-postgres/metadata.yaml | 2 +- .../postgres/PostgresDestination.java | 28 +- .../postgres/PostgresSqlOperations.java | 8 +- .../typing_deduping/PostgresSqlGenerator.java | 279 ++++++++++++++++++ ...PostgresRawOverrideTypingDedupingTest.java | 22 ++ .../PostgresSqlGeneratorIntegrationTest.java | 153 ++++++++++ .../PostgresTypingDedupingTest.java | 75 +++++ ...orchange_expectedrecords_dedup_final.jsonl | 3 + ...rsorchange_expectedrecords_dedup_raw.jsonl | 4 + .../sync1_expectedrecords_dedup_final.jsonl | 4 + .../sync1_expectedrecords_dedup_final2.jsonl | 1 + ...sync1_expectedrecords_nondedup_final.jsonl | 5 + .../dat/sync1_expectedrecords_raw.jsonl | 5 + .../dat/sync1_expectedrecords_raw2.jsonl | 1 + ...ectedrecords_incremental_dedup_final.jsonl | 3 + ...xpectedrecords_incremental_dedup_raw.jsonl | 7 + ...ctedrecords_fullrefresh_append_final.jsonl | 8 + ...drecords_fullrefresh_overwrite_final.jsonl | 3 + ...tedrecords_fullrefresh_overwrite_raw.jsonl | 3 + ...ectedrecords_incremental_dedup_final.jsonl | 3 + ...ctedrecords_incremental_dedup_final2.jsonl | 1 + .../dat/sync2_expectedrecords_raw.jsonl | 9 + .../dat/sync2_expectedrecords_raw2.jsonl | 2 + .../alltypes_expectedrecords_final.jsonl | 8 + .../alltypes_expectedrecords_raw.jsonl | 6 + ...crementaldedup_expectedrecords_final.jsonl | 2 + ...incrementaldedup_expectedrecords_raw.jsonl | 3 + ...ypes_in_string_expectedrecords_final.jsonl | 5 + ..._types_in_string_expectedrecords_raw.jsonl | 5 + .../nocolumns_expectedrecords_final.jsonl | 1 + .../nocolumns_expectedrecords_raw.jsonl | 1 + ...servedkeywords_expectedrecords_final.jsonl | 1 + ...mestampformats_expectedrecords_final.jsonl | 16 + ...irdcolumnnames_expectedrecords_final.jsonl | 9 + ...weirdcolumnnames_expectedrecords_raw.jsonl | 1 + docs/integrations/destinations/postgres.md | 3 +- 54 files changed, 972 insertions(+), 111 deletions(-) create mode 100644 airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.java create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresSqlGenerator.java create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresRawOverrideTypingDedupingTest.java create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresSqlGeneratorIntegrationTest.java create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresTypingDedupingTest.java create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/reservedkeywords_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/timestampformats_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_raw.jsonl diff --git a/airbyte-cdk/java/airbyte-cdk/README.md b/airbyte-cdk/java/airbyte-cdk/README.md index d572437df71c..47539a8cc70f 100644 --- a/airbyte-cdk/java/airbyte-cdk/README.md +++ b/airbyte-cdk/java/airbyte-cdk/README.md @@ -166,6 +166,7 @@ MavenLocal debugging steps: | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.13.0 | 2024-01-16 | [\#34177](https://github.com/airbytehq/airbyte/pull/34177) | Add `useExpensiveSafeCasting` param in JdbcSqlGenerator methods; add JdbcTypingDedupingTest fixture; other DV2-related changes | | 0.12.1 | 2024-01-11 | [\#34186](https://github.com/airbytehq/airbyte/pull/34186) | Add hook for additional destination specific checks to JDBC destination check method | | 0.12.0 | 2024-01-10 | [\#33875](https://github.com/airbytehq/airbyte/pull/33875) | Upgrade sshd-mina to 2.11.1 | | 0.11.5 | 2024-01-10 | [\#34119](https://github.com/airbytehq/airbyte/pull/34119) | Remove wal2json support for postgres+debezium. | diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java index 99da3aa21fe1..a4324a30ebf7 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java @@ -32,11 +32,7 @@ public static DataSource create(final String username, final String password, final String driverClassName, final String jdbcConnectionString) { - return new DataSourceBuilder() - .withDriverClassName(driverClassName) - .withJdbcUrl(jdbcConnectionString) - .withPassword(password) - .withUsername(username) + return new DataSourceBuilder(username, password, driverClassName, jdbcConnectionString) .build(); } @@ -56,12 +52,8 @@ public static DataSource create(final String username, final String jdbcConnectionString, final Map connectionProperties, final Duration connectionTimeout) { - return new DataSourceBuilder() + return new DataSourceBuilder(username, password, driverClassName, jdbcConnectionString) .withConnectionProperties(connectionProperties) - .withDriverClassName(driverClassName) - .withJdbcUrl(jdbcConnectionString) - .withPassword(password) - .withUsername(username) .withConnectionTimeout(connectionTimeout) .build(); } @@ -83,13 +75,7 @@ public static DataSource create(final String username, final int port, final String database, final String driverClassName) { - return new DataSourceBuilder() - .withDatabase(database) - .withDriverClassName(driverClassName) - .withHost(host) - .withPort(port) - .withPassword(password) - .withUsername(username) + return new DataSourceBuilder(username, password, driverClassName, host, port, database) .build(); } @@ -112,14 +98,8 @@ public static DataSource create(final String username, final String database, final String driverClassName, final Map connectionProperties) { - return new DataSourceBuilder() + return new DataSourceBuilder(username, password, driverClassName, host, port, database) .withConnectionProperties(connectionProperties) - .withDatabase(database) - .withDriverClassName(driverClassName) - .withHost(host) - .withPort(port) - .withPassword(password) - .withUsername(username) .build(); } @@ -139,13 +119,7 @@ public static DataSource createPostgres(final String username, final String host, final int port, final String database) { - return new DataSourceBuilder() - .withDatabase(database) - .withDriverClassName("org.postgresql.Driver") - .withHost(host) - .withPort(port) - .withPassword(password) - .withUsername(username) + return new DataSourceBuilder(username, password, "org.postgresql.Driver", host, port, database) .build(); } @@ -158,7 +132,7 @@ public static DataSource createPostgres(final String username, */ public static void close(final DataSource dataSource) throws Exception { if (dataSource != null) { - if (dataSource instanceof AutoCloseable closeable) { + if (dataSource instanceof final AutoCloseable closeable) { closeable.close(); } } @@ -167,7 +141,7 @@ public static void close(final DataSource dataSource) throws Exception { /** * Builder class used to configure and construct {@link DataSource} instances. */ - private static class DataSourceBuilder { + public static class DataSourceBuilder { private Map connectionProperties = Map.of(); private String database; @@ -180,8 +154,35 @@ private static class DataSourceBuilder { private String password; private int port = 5432; private String username; + private String connectionInitSql; - private DataSourceBuilder() {} + private DataSourceBuilder(final String username, + final String password, + final String driverClassName) { + this.username = username; + this.password = password; + this.driverClassName = driverClassName; + } + + public DataSourceBuilder(final String username, + final String password, + final String driverClassName, + final String jdbcUrl) { + this(username, password, driverClassName); + this.jdbcUrl = jdbcUrl; + } + + public DataSourceBuilder(final String username, + final String password, + final String driverClassName, + final String host, + final int port, + final String database) { + this(username, password, driverClassName); + this.host = host; + this.port = port; + this.database = database; + } public DataSourceBuilder withConnectionProperties(final Map connectionProperties) { if (connectionProperties != null) { @@ -248,6 +249,11 @@ public DataSourceBuilder withUsername(final String username) { return this; } + public DataSourceBuilder withConnectionInitSql(final String sql) { + this.connectionInitSql = sql; + return this; + } + public DataSource build() { final DatabaseDriver databaseDriver = DatabaseDriver.findByDriverClassName(driverClassName); @@ -272,6 +278,8 @@ public DataSource build() { */ config.setInitializationFailTimeout(Integer.MIN_VALUE); + config.setConnectionInitSql(connectionInitSql); + connectionProperties.forEach(config::addDataSourceProperty); return new HikariDataSource(config); diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/jdbc/JdbcDatabase.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/jdbc/JdbcDatabase.java index ff7db2e6a5ff..8557aaecece6 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/jdbc/JdbcDatabase.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/jdbc/JdbcDatabase.java @@ -174,15 +174,17 @@ public List queryJsons(final CheckedFunction stream = unsafeQuery(c -> { - PreparedStatement statement = c.prepareStatement(sql); - int i = 1; - for (String param : params) { - statement.setString(i, param); - ++i; - } - return statement; - }, rs -> rs.getInt(1))) { + try (final Stream stream = unsafeQuery( + c -> getPreparedStatement(sql, params, c), + rs -> rs.getInt(1))) { + return stream.findFirst().get(); + } + } + + public boolean queryBoolean(final String sql, final String... params) throws SQLException { + try (final Stream stream = unsafeQuery( + c -> getPreparedStatement(sql, params, c), + rs -> rs.getBoolean(1))) { return stream.findFirst().get(); } } @@ -216,15 +218,8 @@ public List queryJsons(final String sql, final String... params) throw } public ResultSetMetaData queryMetadata(final String sql, final String... params) throws SQLException { - try (final Stream q = unsafeQuery(c -> { - PreparedStatement statement = c.prepareStatement(sql); - int i = 1; - for (String param : params) { - statement.setString(i, param); - ++i; - } - return statement; - }, + try (final Stream q = unsafeQuery( + c -> getPreparedStatement(sql, params, c), ResultSet::getMetaData)) { return q.findFirst().orElse(null); } @@ -232,4 +227,14 @@ public ResultSetMetaData queryMetadata(final String sql, final String... params) public abstract DatabaseMetaData getMetaData() throws SQLException; + private static PreparedStatement getPreparedStatement(String sql, String[] params, Connection c) throws SQLException { + PreparedStatement statement = c.prepareStatement(sql); + int i = 1; + for (String param : params) { + statement.setString(i, param); + i++; + } + return statement; + } + } diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties index db02062e2991..f6cee2374148 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties @@ -1 +1 @@ -version=0.12.1 +version=0.13.0 diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java index ff93320b19bd..d25b6ecb4296 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/AbstractJdbcDestination.java @@ -200,17 +200,26 @@ private static PartialAirbyteMessage getDummyRecord() { .withSerialized(dummyDataToInsert.toString()); } + /** + * Subclasses which need to modify the DataSource should override + * {@link #modifyDataSourceBuilder(DataSourceFactory.DataSourceBuilder)} rather than this method. + */ @VisibleForTesting public DataSource getDataSource(final JsonNode config) { final JsonNode jdbcConfig = toJdbcConfig(config); final Map connectionProperties = getConnectionProperties(config); - return DataSourceFactory.create( + final DataSourceFactory.DataSourceBuilder builder = new DataSourceFactory.DataSourceBuilder( jdbcConfig.get(JdbcUtils.USERNAME_KEY).asText(), jdbcConfig.has(JdbcUtils.PASSWORD_KEY) ? jdbcConfig.get(JdbcUtils.PASSWORD_KEY).asText() : null, driverClassName, - jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText(), - connectionProperties, - getConnectionTimeout(connectionProperties)); + jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText()) + .withConnectionProperties(connectionProperties) + .withConnectionTimeout(getConnectionTimeout(connectionProperties)); + return modifyDataSourceBuilder(builder).build(); + } + + protected DataSourceFactory.DataSourceBuilder modifyDataSourceBuilder(final DataSourceFactory.DataSourceBuilder builder) { + return builder; } @VisibleForTesting @@ -287,7 +296,7 @@ public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonN final var migrator = new JdbcV1V2Migrator(namingResolver, database, databaseName); final NoopV2TableMigrator v2TableMigrator = new NoopV2TableMigrator(); final DestinationHandler destinationHandler = getDestinationHandler(databaseName, database); - boolean disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config.get(DISABLE_TYPE_DEDUPE).asBoolean(false); + final boolean disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config.get(DISABLE_TYPE_DEDUPE).asBoolean(false); final TyperDeduper typerDeduper; if (disableTypeDedupe) { typerDeduper = new NoOpTyperDeduperWithV1V2Migrations<>(sqlGenerator, destinationHandler, parsedCatalog, migrator, v2TableMigrator, diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.java index a7db620058fc..7ce3a8a7a01c 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/JdbcSqlOperations.java @@ -87,22 +87,26 @@ public String createTableQuery(final JdbcDatabase database, final String schemaN protected String createTableQueryV1(final String schemaName, final String tableName) { return String.format( - "CREATE TABLE IF NOT EXISTS %s.%s ( \n" - + "%s VARCHAR PRIMARY KEY,\n" - + "%s JSONB,\n" - + "%s TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP\n" - + ");\n", + """ + CREATE TABLE IF NOT EXISTS %s.%s ( + %s VARCHAR PRIMARY KEY, + %s JSONB, + %s TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP + ); + """, schemaName, tableName, JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_EMITTED_AT); } protected String createTableQueryV2(final String schemaName, final String tableName) { return String.format( - "CREATE TABLE IF NOT EXISTS %s.%s ( \n" - + "%s VARCHAR PRIMARY KEY,\n" - + "%s JSONB,\n" - + "%s TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP\n" - + "%s TIMESTAMP WITH TIME ZONE DEFAULT NULL\n" - + ");\n", + """ + CREATE TABLE IF NOT EXISTS %s.%s ( + %s VARCHAR PRIMARY KEY, + %s JSONB, + %s TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + %s TIMESTAMP WITH TIME ZONE DEFAULT NULL + ); + """, schemaName, tableName, JavaBaseConstants.COLUMN_NAME_AB_RAW_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT, JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT); } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.java index 46acc8b3d025..3981c07c5ad2 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.java @@ -4,6 +4,12 @@ package io.airbyte.cdk.integrations.destination.jdbc.typing_deduping; +import static org.jooq.impl.DSL.exists; +import static org.jooq.impl.DSL.field; +import static org.jooq.impl.DSL.name; +import static org.jooq.impl.DSL.select; +import static org.jooq.impl.DSL.selectOne; + import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.integrations.destination.jdbc.ColumnDefinition; import io.airbyte.cdk.integrations.destination.jdbc.CustomSqlType; @@ -26,6 +32,7 @@ import java.util.UUID; import java.util.stream.Stream; import lombok.extern.slf4j.Slf4j; +import org.jooq.conf.ParamType; import org.jooq.impl.DSL; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,18 +58,13 @@ public Optional findExistingTable(final StreamId id) throws Exc @Override public boolean isFinalTableEmpty(final StreamId id) throws Exception { - final int rowCount = jdbcDatabase.queryInt( - """ - SELECT row_count - FROM information_schema.tables - WHERE table_catalog = ? - AND table_schema = ? - AND table_name = ? - """, - databaseName, - id.finalNamespace(), - id.finalName()); - return rowCount == 0; + return !jdbcDatabase.queryBoolean( + select( + field(exists( + selectOne() + .from(name(id.finalNamespace(), id.finalName())) + .limit(1)))) + .getSQL(ParamType.INLINED)); } @Override @@ -83,8 +85,8 @@ public InitialRawTableState getInitialRawTableState(final StreamId id) throws Ex // but it's also the only method in the JdbcDatabase interface to return non-string/int types try (final Stream timestampStream = jdbcDatabase.unsafeQuery( conn -> conn.prepareStatement( - DSL.select(DSL.field("MIN(_airbyte_extracted_at)").as("min_timestamp")) - .from(DSL.name(id.rawNamespace(), id.rawName())) + select(field("MIN(_airbyte_extracted_at)").as("min_timestamp")) + .from(name(id.rawNamespace(), id.rawName())) .where(DSL.condition("_airbyte_loaded_at IS NULL")) .getSQL()), record -> record.getTimestamp("min_timestamp"))) { @@ -102,8 +104,8 @@ record -> record.getTimestamp("min_timestamp"))) { // This second query just finds the newest raw record. try (final Stream timestampStream = jdbcDatabase.unsafeQuery( conn -> conn.prepareStatement( - DSL.select(DSL.field("MAX(_airbyte_extracted_at)").as("min_timestamp")) - .from(DSL.name(id.rawNamespace(), id.rawName())) + select(field("MAX(_airbyte_extracted_at)").as("min_timestamp")) + .from(name(id.rawNamespace(), id.rawName())) .getSQL()), record -> record.getTimestamp("min_timestamp"))) { // Filter for nonNull values in case the query returned NULL (i.e. no raw records at all). diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java index a66f6ac5594e..0feb56ca9a9e 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java @@ -44,10 +44,12 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import java.sql.Timestamp; import java.time.Instant; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.stream.Stream; import org.jooq.CommonTableExpression; import org.jooq.Condition; import org.jooq.CreateSchemaFinalStep; @@ -259,11 +261,15 @@ public Sql createTable(final StreamConfig stream, final String suffix, final boo // TODO: Use Naming transformer to sanitize these strings with redshift restrictions. final String finalTableIdentifier = stream.id().finalName() + suffix.toLowerCase(); if (!force) { - return Sql.of(createTableSql(stream.id().finalNamespace(), finalTableIdentifier, stream.columns())); + return transactionally(Stream.concat( + Stream.of(createTableSql(stream.id().finalNamespace(), finalTableIdentifier, stream.columns())), + createIndexSql(stream, suffix).stream()).toList()); } - return transactionally( - dropTableIfExists(quotedName(stream.id().finalNamespace(), finalTableIdentifier)).getSQL(ParamType.INLINED), - createTableSql(stream.id().finalNamespace(), finalTableIdentifier, stream.columns())); + return transactionally(Stream.concat( + Stream.of( + dropTableIfExists(quotedName(stream.id().finalNamespace(), finalTableIdentifier)).getSQL(ParamType.INLINED), + createTableSql(stream.id().finalNamespace(), finalTableIdentifier, stream.columns())), + createIndexSql(stream, suffix).stream()).toList()); } @Override @@ -419,10 +425,18 @@ protected String createTableSql(final String namespace, final String tableName, final DSLContext dsl = getDslContext(); final CreateTableColumnStep createTableSql = dsl .createTable(quotedName(namespace, tableName)) - .columns(buildFinalTableFields(columns, getFinalTableMetaColumns(true)));; + .columns(buildFinalTableFields(columns, getFinalTableMetaColumns(true))); return createTableSql.getSQL(); } + /** + * Subclasses may override this method to add additional indexes after their CREATE TABLE statement. + * This is useful if the destination's CREATE TABLE statement does not accept an index definition. + */ + protected List createIndexSql(final StreamConfig stream, final String suffix) { + return Collections.emptyList(); + } + protected String beginTransaction() { return "BEGIN"; } @@ -471,22 +485,26 @@ private String checkpointRawTable(final String schemaName, final String tableNam .getSQL(ParamType.INLINED); } - protected Field castedField(final Field field, final AirbyteType type, final String alias) { + protected Field castedField( + final Field field, + final AirbyteType type, + final String alias, + final boolean useExpensiveSaferCasting) { if (type instanceof final AirbyteProtocolType airbyteProtocolType) { - return castedField(field, airbyteProtocolType).as(quotedName(alias)); - + return castedField(field, airbyteProtocolType, useExpensiveSaferCasting).as(quotedName(alias)); } + // Redshift SUPER can silently cast an array type to struct and vice versa. return switch (type.getTypeName()) { case Struct.TYPE, UnsupportedOneOf.TYPE -> cast(field, getStructType()).as(quotedName(alias)); case Array.TYPE -> cast(field, getArrayType()).as(quotedName(alias)); // No nested Unions supported so this will definitely not result in infinite recursion. - case Union.TYPE -> castedField(field, ((Union) type).chooseType(), alias); + case Union.TYPE -> castedField(field, ((Union) type).chooseType(), alias, useExpensiveSaferCasting); default -> throw new IllegalArgumentException("Unsupported AirbyteType: " + type); }; } - protected Field castedField(final Field field, final AirbyteProtocolType type) { + protected Field castedField(final Field field, final AirbyteProtocolType type, final boolean useExpensiveSaferCasting) { return cast(field, toDialectType(type)); } diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.java index 3f46cc2f28ea..a71711cf17c4 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcSqlGeneratorIntegrationTest.java @@ -12,6 +12,8 @@ import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_DATA; import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_EMITTED_AT; import static io.airbyte.cdk.integrations.base.JavaBaseConstants.LEGACY_RAW_TABLE_COLUMNS; +import static org.jooq.impl.DSL.field; +import static org.jooq.impl.DSL.quotedName; import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.cdk.db.jdbc.JdbcDatabase; @@ -68,7 +70,7 @@ private void insertRecords(final Name tableName, final List columnNames, throws SQLException { InsertValuesStepN insert = getDslContext().insertInto( DSL.table(tableName), - columnNames.stream().map(DSL::field).toList()); + columnNames.stream().map(columnName -> field(quotedName(columnName))).toList()); for (final JsonNode record : records) { insert = insert.values( columnNames.stream() diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.java new file mode 100644 index 000000000000..f77448d62170 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/java/io/airbyte/cdk/integrations/standardtest/destination/typing_deduping/JdbcTypingDedupingTest.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cdk.integrations.standardtest.destination.typing_deduping; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.db.JdbcCompatibleSourceOperations; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.base.destination.typing_deduping.BaseTypingDedupingTest; +import io.airbyte.integrations.base.destination.typing_deduping.StreamId; +import java.util.List; +import javax.sql.DataSource; +import org.jooq.impl.DSL; + +/** + * This class is largely the same as + * {@link io.airbyte.integrations.destination.snowflake.typing_deduping.AbstractSnowflakeTypingDedupingTest}. + * But (a) it uses jooq to construct the sql statements, and (b) it doesn't need to upcase anything. + * At some point we might (?) want to do a refactor to combine them. + */ +public abstract class JdbcTypingDedupingTest extends BaseTypingDedupingTest { + + private JdbcDatabase database; + private DataSource dataSource; + + /** + * Get the config as declared in GSM (or directly from the testcontainer). This class will do + * further modification to the config to ensure test isolation.i + */ + protected abstract ObjectNode getBaseConfig(); + + protected abstract DataSource getDataSource(JsonNode config); + + /** + * Subclasses may need to return a custom source operations if the default one does not handle + * vendor-specific types correctly. For example, you most likely need to override this method to + * deserialize JSON columns to JsonNode. + */ + protected JdbcCompatibleSourceOperations getSourceOperations() { + return JdbcUtils.getDefaultSourceOperations(); + } + + /** + * Subclasses using a config with a nonstandard raw table schema should override this method. + */ + protected String getRawSchema() { + return JavaBaseConstants.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE; + } + + /** + * Subclasses using a config where the default schema is not in the {@code schema} key should + * override this method and {@link #setDefaultSchema(JsonNode, String)}. + */ + protected String getDefaultSchema(final JsonNode config) { + return config.get("schema").asText(); + } + + /** + * Subclasses using a config where the default schema is not in the {@code schema} key should + * override this method and {@link #getDefaultSchema(JsonNode)}. + */ + protected void setDefaultSchema(final JsonNode config, final String schema) { + ((ObjectNode) config).put("schema", schema); + } + + @Override + protected JsonNode generateConfig() { + final JsonNode config = getBaseConfig(); + setDefaultSchema(config, "typing_deduping_default_schema" + getUniqueSuffix()); + dataSource = getDataSource(config); + database = new DefaultJdbcDatabase(dataSource, getSourceOperations()); + return config; + } + + @Override + protected List dumpRawTableRecords(String streamNamespace, final String streamName) throws Exception { + if (streamNamespace == null) { + streamNamespace = getDefaultSchema(getConfig()); + } + final String tableName = StreamId.concatenateRawTableName(streamNamespace, streamName); + final String schema = getRawSchema(); + return database.queryJsons(DSL.selectFrom(DSL.name(schema, tableName)).getSQL()); + } + + @Override + protected List dumpFinalTableRecords(String streamNamespace, final String streamName) throws Exception { + if (streamNamespace == null) { + streamNamespace = getDefaultSchema(getConfig()); + } + return database.queryJsons(DSL.selectFrom(DSL.name(streamNamespace, streamName)).getSQL()); + } + + @Override + protected void teardownStreamAndNamespace(String streamNamespace, final String streamName) throws Exception { + if (streamNamespace == null) { + streamNamespace = getDefaultSchema(getConfig()); + } + database.execute(DSL.dropTableIfExists(DSL.name(getRawSchema(), StreamId.concatenateRawTableName(streamNamespace, streamName))).getSQL()); + database.execute(DSL.dropSchemaIfExists(DSL.name(streamNamespace)).cascade().getSQL()); + } + + @Override + protected void globalTeardown() throws Exception { + DataSourceFactory.close(dataSource); + } + +} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.java b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.java index 9fff9fd8e116..d01f47060ba4 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.java +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/java/io/airbyte/integrations/base/destination/typing_deduping/DefaultTyperDeduper.java @@ -121,6 +121,7 @@ public void prepareTables() throws Exception { } overwriteStreamsWithTmpTable = ConcurrentHashMap.newKeySet(); LOGGER.info("Preparing tables"); + prepareSchemas(parsedCatalog); final Set>> prepareTablesTasks = new HashSet<>(); for (final StreamConfig stream : parsedCatalog.streams()) { diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/java/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.java b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/java/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.java index e094930853bf..cfc7eae3fa8a 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/java/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.java +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/java/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.java @@ -194,6 +194,7 @@ protected Map getFinalMetadataColumnNames() { public void setup() throws Exception { generator = getSqlGenerator(); destinationHandler = getDestinationHandler(); + final ColumnId id1 = generator.buildColumnId("id1"); final ColumnId id2 = generator.buildColumnId("id2"); primaryKey = List.of(id1, id2); @@ -425,6 +426,26 @@ public void allTypes() throws Exception { assertFalse(destinationHandler.isFinalTableEmpty(streamId), "Final table should not be empty after T+D"); } + /** + * Run a basic test to verify that we don't throw an exception on basic data values. + */ + @Test + public void allTypesUnsafe() throws Exception { + createRawTable(streamId); + createFinalTable(incrementalDedupStream, ""); + insertRawTableRecords( + streamId, + BaseTypingDedupingTest.readRecords("sqlgenerator/alltypes_unsafe_inputrecords.jsonl")); + + assertTrue(destinationHandler.isFinalTableEmpty(streamId), "Final table should be empty before T+D"); + + // Instead of using the full T+D transaction, explicitly run with useSafeCasting=false. + final Sql unsafeSql = generator.updateTable(incrementalDedupStream, "", Optional.empty(), false); + destinationHandler.execute(unsafeSql); + + assertFalse(destinationHandler.isFinalTableEmpty(streamId), "Final table should not be empty after T+D"); + } + /** * Run through some plausible T+D scenarios to verify that we correctly identify the min raw * timestamp. diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl new file mode 100644 index 000000000000..55a509408d14 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl @@ -0,0 +1,3 @@ +// this is a strict subset of the alltypes_inputrecords file. All these records have valid values, i.e. can be processed with unsafe casting. +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}} diff --git a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/build.gradle b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/build.gradle index 11cfb6f26b78..4f56294fe85e 100644 --- a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/build.gradle +++ b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/build.gradle @@ -4,7 +4,7 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.12.0' + cdkVersionRequired = '0.13.0' features = [ 'db-sources', // required for tests 'db-destinations' diff --git a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/gradle.properties b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/gradle.properties index 2b147dcf7175..4dbe8b8729df 100644 --- a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/gradle.properties +++ b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/gradle.properties @@ -1,3 +1 @@ -# currently limit the number of parallel threads until further investigation into the issues \ -# where integration tests run into race conditions -testExecutionConcurrency=1 +testExecutionConcurrency=-1 diff --git a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/metadata.yaml b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/metadata.yaml index 9cd1928961ec..d4f379fdbe5c 100644 --- a/airbyte-integrations/connectors/destination-postgres-strict-encrypt/metadata.yaml +++ b/airbyte-integrations/connectors/destination-postgres-strict-encrypt/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: database connectorType: destination definitionId: 25c5221d-dce2-4163-ade9-739ef790f503 - dockerImageTag: 0.5.3 + dockerImageTag: 0.5.4 dockerRepository: airbyte/destination-postgres-strict-encrypt githubIssueLabel: destination-postgres icon: postgresql.svg diff --git a/airbyte-integrations/connectors/destination-postgres/build.gradle b/airbyte-integrations/connectors/destination-postgres/build.gradle index ed2e3d3ffcbd..e5c75a6632c4 100644 --- a/airbyte-integrations/connectors/destination-postgres/build.gradle +++ b/airbyte-integrations/connectors/destination-postgres/build.gradle @@ -4,7 +4,7 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.12.0' + cdkVersionRequired = '0.13.0' features = [ 'db-sources', // required for tests 'db-destinations', diff --git a/airbyte-integrations/connectors/destination-postgres/gradle.properties b/airbyte-integrations/connectors/destination-postgres/gradle.properties index 2b147dcf7175..4dbe8b8729df 100644 --- a/airbyte-integrations/connectors/destination-postgres/gradle.properties +++ b/airbyte-integrations/connectors/destination-postgres/gradle.properties @@ -1,3 +1 @@ -# currently limit the number of parallel threads until further investigation into the issues \ -# where integration tests run into race conditions -testExecutionConcurrency=1 +testExecutionConcurrency=-1 diff --git a/airbyte-integrations/connectors/destination-postgres/metadata.yaml b/airbyte-integrations/connectors/destination-postgres/metadata.yaml index 601a751d1cee..23020e463a0b 100644 --- a/airbyte-integrations/connectors/destination-postgres/metadata.yaml +++ b/airbyte-integrations/connectors/destination-postgres/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: database connectorType: destination definitionId: 25c5221d-dce2-4163-ade9-739ef790f503 - dockerImageTag: 0.5.3 + dockerImageTag: 0.5.4 dockerRepository: airbyte/destination-postgres documentationUrl: https://docs.airbyte.com/integrations/destinations/postgres githubIssueLabel: destination-postgres diff --git a/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/PostgresDestination.java b/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/PostgresDestination.java index 78f4709cfefe..3a2a36ce446b 100644 --- a/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/PostgresDestination.java +++ b/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/PostgresDestination.java @@ -12,6 +12,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DataSourceFactory; import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.Destination; @@ -20,6 +21,7 @@ import io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination; import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.postgres.typing_deduping.PostgresSqlGenerator; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.HashMap; @@ -42,6 +44,28 @@ public PostgresDestination() { super(DRIVER_CLASS, new PostgresSQLNameTransformer(), new PostgresSqlOperations()); } + @Override + protected DataSourceFactory.DataSourceBuilder modifyDataSourceBuilder(final DataSourceFactory.DataSourceBuilder builder) { + // Anything in the pg_temp schema is only visible to the connection that created it. + // So this creates an airbyte_safe_cast function that only exists for the duration of + // a single connection. + // This avoids issues with creating the same function concurrently (e.g. if multiple syncs run + // at the same time). + // Function definition copied from https://dba.stackexchange.com/a/203986 + return builder.withConnectionInitSql(""" + CREATE FUNCTION pg_temp.airbyte_safe_cast(_in text, INOUT _out ANYELEMENT) + LANGUAGE plpgsql AS + $func$ + BEGIN + EXECUTE format('SELECT %L::%s', $1, pg_typeof(_out)) + INTO _out; + EXCEPTION WHEN others THEN + -- do nothing: _out already carries default + END + $func$; + """); + } + @Override protected Map getDefaultConnectionProperties(final JsonNode config) { final Map additionalParameters = new HashMap<>(); @@ -68,7 +92,7 @@ public JsonNode toJdbcConfig(final JsonNode config) { if (encodedDatabase != null) { try { encodedDatabase = URLEncoder.encode(encodedDatabase, "UTF-8"); - } catch (UnsupportedEncodingException e) { + } catch (final UnsupportedEncodingException e) { // Should never happen e.printStackTrace(); } @@ -96,7 +120,7 @@ public JsonNode toJdbcConfig(final JsonNode config) { @Override protected JdbcSqlGenerator getSqlGenerator() { - throw new UnsupportedOperationException("PostgresDestination#getSqlGenerator is not implemented"); + return new PostgresSqlGenerator(new PostgresSQLNameTransformer()); } public static void main(final String[] args) throws Exception { diff --git a/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/PostgresSqlOperations.java b/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/PostgresSqlOperations.java index 3cf90d4d0a0d..43236bf65d1d 100644 --- a/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/PostgresSqlOperations.java +++ b/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/PostgresSqlOperations.java @@ -25,9 +25,13 @@ public PostgresSqlOperations() { } @Override - protected void insertRecordsInternalV2(JdbcDatabase database, List records, String schemaName, String tableName) + protected void insertRecordsInternalV2(final JdbcDatabase database, + final List records, + final String schemaName, + final String tableName) throws Exception { - throw new UnsupportedOperationException("PostgresSqlOperations#insertRecordsInternalV2 is not implemented"); + // idk apparently this just works + insertRecordsInternal(database, records, schemaName, tableName); } @Override diff --git a/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresSqlGenerator.java b/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresSqlGenerator.java new file mode 100644 index 000000000000..2d6469192ed1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/main/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresSqlGenerator.java @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.postgres.typing_deduping; + +import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT; +import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_LOADED_AT; +import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_META; +import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_DATA; +import static java.util.Collections.emptyList; +import static org.jooq.impl.DSL.array; +import static org.jooq.impl.DSL.case_; +import static org.jooq.impl.DSL.cast; +import static org.jooq.impl.DSL.field; +import static org.jooq.impl.DSL.function; +import static org.jooq.impl.DSL.name; +import static org.jooq.impl.DSL.quotedName; +import static org.jooq.impl.DSL.rowNumber; +import static org.jooq.impl.DSL.val; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; +import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition; +import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; +import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType; +import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType; +import io.airbyte.integrations.base.destination.typing_deduping.Array; +import io.airbyte.integrations.base.destination.typing_deduping.ColumnId; +import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig; +import io.airbyte.integrations.base.destination.typing_deduping.Struct; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.jooq.Condition; +import org.jooq.DataType; +import org.jooq.Field; +import org.jooq.SQLDialect; +import org.jooq.impl.DefaultDataType; +import org.jooq.impl.SQLDataType; + +public class PostgresSqlGenerator extends JdbcSqlGenerator { + + public static final DataType JSONB_TYPE = new DefaultDataType<>(null, Object.class, "jsonb"); + + private static final Map POSTGRES_TYPE_NAME_TO_JDBC_TYPE = ImmutableMap.of( + "numeric", "decimal", + "int8", "bigint", + "bool", "boolean", + "timestamptz", "timestamp with time zone", + "timetz", "time with time zone"); + + public PostgresSqlGenerator(final NamingConventionTransformer namingTransformer) { + super(namingTransformer); + } + + @Override + protected DataType getStructType() { + return JSONB_TYPE; + } + + @Override + protected DataType getArrayType() { + return JSONB_TYPE; + } + + @Override + protected DataType getWidestType() { + return JSONB_TYPE; + } + + @Override + protected SQLDialect getDialect() { + return SQLDialect.POSTGRES; + } + + @Override + protected List createIndexSql(final StreamConfig stream, final String suffix) { + if (stream.destinationSyncMode() == DestinationSyncMode.APPEND_DEDUP && !stream.primaryKey().isEmpty()) { + return List.of( + getDslContext().createIndex().on( + name(stream.id().finalNamespace(), stream.id().finalName() + suffix), + stream.primaryKey().stream() + .map(pk -> quotedName(pk.name())) + .toList()) + .getSQL()); + } else { + return emptyList(); + } + } + + @Override + protected List> extractRawDataFields(final LinkedHashMap columns, final boolean useExpensiveSaferCasting) { + return columns + .entrySet() + .stream() + .map(column -> castedField( + extractColumnAsJson(column.getKey()), + column.getValue(), + column.getKey().name(), + useExpensiveSaferCasting)) + .collect(Collectors.toList()); + } + + @Override + protected Field castedField( + final Field field, + final AirbyteType type, + final String alias, + final boolean useExpensiveSaferCasting) { + return castedField(field, type, useExpensiveSaferCasting).as(quotedName(alias)); + } + + protected Field castedField( + final Field field, + final AirbyteType type, + final boolean useExpensiveSaferCasting) { + if (type instanceof Struct) { + // If this field is a struct, verify that the raw data is an object. + return cast( + case_() + .when(field.isNull().or(jsonTypeof(field).ne("object")), val((Object) null)) + .else_(field), + JSONB_TYPE); + } else if (type instanceof Array) { + // Do the same for arrays. + return cast( + case_() + .when(field.isNull().or(jsonTypeof(field).ne("array")), val((Object) null)) + .else_(field), + JSONB_TYPE); + } else if (type == AirbyteProtocolType.UNKNOWN) { + return cast(field, JSONB_TYPE); + } else if (type == AirbyteProtocolType.STRING) { + // we need to render the jsonb to a normal string. For strings, this is the difference between + // "\"foo\"" and "foo". + // postgres provides the #>> operator, which takes a json path and returns that extraction as a + // string. + // '{}' is an empty json path (it's an empty array literal), so it just stringifies the json value. + return field("{0} #>> '{}'", String.class, field); + } else { + final DataType dialectType = toDialectType(type); + // jsonb can't directly cast to most types, so convert to text first. + // also convert jsonb null to proper sql null. + final Field extractAsText = case_() + .when(field.isNull().or(jsonTypeof(field).eq("null")), val((String) null)) + .else_(cast(field, SQLDataType.VARCHAR)); + if (useExpensiveSaferCasting) { + return function(name("pg_temp", "airbyte_safe_cast"), dialectType, extractAsText, cast(val((Object) null), dialectType)); + } else { + return cast(extractAsText, dialectType); + } + } + } + + // TODO this isn't actually used right now... can we refactor this out? + // (redshift is doing something interesting with this method, so leaving it for now) + @Override + protected Field castedField(final Field field, final AirbyteProtocolType type, final boolean useExpensiveSaferCasting) { + return cast(field, toDialectType(type)); + } + + @Override + protected Field buildAirbyteMetaColumn(final LinkedHashMap columns) { + final Field[] dataFieldErrors = columns + .entrySet() + .stream() + .map(column -> toCastingErrorCaseStmt(column.getKey(), column.getValue())) + .toArray(Field[]::new); + return function( + "JSONB_BUILD_OBJECT", + JSONB_TYPE, + val("errors"), + function("ARRAY_REMOVE", JSONB_TYPE, array(dataFieldErrors), val((String) null))).as(COLUMN_NAME_AB_META); + } + + private Field toCastingErrorCaseStmt(final ColumnId column, final AirbyteType type) { + final Field extract = extractColumnAsJson(column); + if (type instanceof Struct) { + // If this field is a struct, verify that the raw data is an object or null. + return case_() + .when( + extract.isNotNull() + .and(jsonTypeof(extract).notIn("object", "null")), + val("Problem with `" + column.originalName() + "`")) + .else_(val((String) null)); + } else if (type instanceof Array) { + // Do the same for arrays. + return case_() + .when( + extract.isNotNull() + .and(jsonTypeof(extract).notIn("array", "null")), + val("Problem with `" + column.originalName() + "`")) + .else_(val((String) null)); + } else if (type == AirbyteProtocolType.UNKNOWN || type == AirbyteProtocolType.STRING) { + // Unknown types require no casting, so there's never an error. + // Similarly, everything can cast to string without error. + return val((String) null); + } else { + // For other type: If the raw data is not NULL or 'null', but the casted data is NULL, + // then we have a typing error. + return case_() + .when( + extract.isNotNull() + .and(jsonTypeof(extract).ne("null")) + .and(castedField(extract, type, true).isNull()), + val("Problem with `" + column.originalName() + "`")) + .else_(val((String) null)); + } + } + + @Override + protected Condition cdcDeletedAtNotNullCondition() { + return field(name(COLUMN_NAME_AB_LOADED_AT)).isNotNull() + .and(jsonTypeof(extractColumnAsJson(cdcDeletedAtColumn)).ne("null")); + } + + @Override + protected Field getRowNumber(final List primaryKeys, final Optional cursor) { + // literally identical to redshift's getRowNumber implementation, changes here probably should + // be reflected there + final List> primaryKeyFields = + primaryKeys != null ? primaryKeys.stream().map(columnId -> field(quotedName(columnId.name()))).collect(Collectors.toList()) + : new ArrayList<>(); + final List> orderedFields = new ArrayList<>(); + // We can still use Jooq's field to get the quoted name with raw sql templating. + // jooq's .desc returns SortField instead of Field and NULLS LAST doesn't work with it + cursor.ifPresent(columnId -> orderedFields.add(field("{0} desc NULLS LAST", field(quotedName(columnId.name()))))); + orderedFields.add(field("{0} desc", quotedName(COLUMN_NAME_AB_EXTRACTED_AT))); + return rowNumber() + .over() + .partitionBy(primaryKeyFields) + .orderBy(orderedFields).as(ROW_NUMBER_COLUMN_NAME); + } + + @Override + public boolean existingSchemaMatchesStreamConfig(final StreamConfig stream, final TableDefinition existingTable) { + // Check that the columns match, with special handling for the metadata columns. + // This is mostly identical to the redshift implementation, but swaps super to jsonb + final LinkedHashMap intendedColumns = stream.columns().entrySet().stream() + .collect(LinkedHashMap::new, + (map, column) -> map.put(column.getKey().name(), toDialectType(column.getValue()).getTypeName()), + LinkedHashMap::putAll); + final LinkedHashMap actualColumns = existingTable.columns().entrySet().stream() + .filter(column -> JavaBaseConstants.V2_FINAL_TABLE_METADATA_COLUMNS.stream() + .noneMatch(airbyteColumnName -> airbyteColumnName.equals(column.getKey()))) + .collect(LinkedHashMap::new, + (map, column) -> map.put(column.getKey(), jdbcTypeNameFromPostgresTypeName(column.getValue().type())), + LinkedHashMap::putAll); + + final boolean sameColumns = actualColumns.equals(intendedColumns) + && "varchar".equals(existingTable.columns().get(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID).type()) + && "timestamptz".equals(existingTable.columns().get(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT).type()) + && "jsonb".equals(existingTable.columns().get(JavaBaseConstants.COLUMN_NAME_AB_META).type()); + + return sameColumns; + } + + /** + * Extract a raw field, leaving it as jsonb + */ + private Field extractColumnAsJson(final ColumnId column) { + return field("{0} -> {1}", name(COLUMN_NAME_DATA), val(column.originalName())); + } + + private Field jsonTypeof(final Field field) { + return function("JSONB_TYPEOF", SQLDataType.VARCHAR, field); + } + + private static String jdbcTypeNameFromPostgresTypeName(final String redshiftType) { + return POSTGRES_TYPE_NAME_TO_JDBC_TYPE.getOrDefault(redshiftType, redshiftType); + } + +} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresRawOverrideTypingDedupingTest.java b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresRawOverrideTypingDedupingTest.java new file mode 100644 index 000000000000..f31c3325d226 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresRawOverrideTypingDedupingTest.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.postgres.typing_deduping; + +import com.fasterxml.jackson.databind.node.ObjectNode; + +public class PostgresRawOverrideTypingDedupingTest extends PostgresTypingDedupingTest { + + @Override + protected ObjectNode getBaseConfig() { + return super.getBaseConfig() + .put("raw_data_schema", "overridden_raw_dataset"); + } + + @Override + protected String getRawSchema() { + return "overridden_raw_dataset"; + } + +} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresSqlGeneratorIntegrationTest.java new file mode 100644 index 000000000000..ee80c3e12ab5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresSqlGeneratorIntegrationTest.java @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.postgres.typing_deduping; + +import static io.airbyte.integrations.destination.postgres.typing_deduping.PostgresSqlGenerator.JSONB_TYPE; +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcSourceOperations; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition; +import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcDestinationHandler; +import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; +import io.airbyte.cdk.integrations.standardtest.destination.typing_deduping.JdbcSqlGeneratorIntegrationTest; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.destination.typing_deduping.DestinationHandler; +import io.airbyte.integrations.base.destination.typing_deduping.Sql; +import io.airbyte.integrations.destination.postgres.PostgresDestination; +import io.airbyte.integrations.destination.postgres.PostgresSQLNameTransformer; +import io.airbyte.integrations.destination.postgres.PostgresTestDatabase; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Optional; +import javax.sql.DataSource; +import org.jooq.DataType; +import org.jooq.Field; +import org.jooq.SQLDialect; +import org.jooq.impl.DSL; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class PostgresSqlGeneratorIntegrationTest extends JdbcSqlGeneratorIntegrationTest { + + private static PostgresTestDatabase testContainer; + private static String databaseName; + private static JdbcDatabase database; + + /** + * See + * {@link io.airbyte.integrations.destination.redshift.typing_deduping.RedshiftSqlGeneratorIntegrationTest.RedshiftSourceOperations}. + * Copied here to avoid weird dependencies. + */ + public static class PostgresSourceOperations extends JdbcSourceOperations { + + @Override + public void copyToJsonField(final ResultSet resultSet, final int colIndex, final ObjectNode json) throws SQLException { + final String columnName = resultSet.getMetaData().getColumnName(colIndex); + final String columnTypeName = resultSet.getMetaData().getColumnTypeName(colIndex).toLowerCase(); + + switch (columnTypeName) { + // JSONB has no equivalent in JDBCType + case "jsonb" -> json.set(columnName, Jsons.deserializeExact(resultSet.getString(colIndex))); + // For some reason, the driver maps these to their timezoneless equivalents (TIME and TIMESTAMP) + case "timetz" -> putTimeWithTimezone(json, columnName, resultSet, colIndex); + case "timestamptz" -> putTimestampWithTimezone(json, columnName, resultSet, colIndex); + default -> super.copyToJsonField(resultSet, colIndex, json); + } + } + + } + + @BeforeAll + public static void setupPostgres() { + testContainer = PostgresTestDatabase.in(PostgresTestDatabase.BaseImage.POSTGRES_13); + final JsonNode config = testContainer.configBuilder() + .with("schema", "public") + .withDatabase() + .withHostAndPort() + .withCredentials() + .withoutSsl() + .build(); + + databaseName = config.get(JdbcUtils.DATABASE_KEY).asText(); + final PostgresDestination postgresDestination = new PostgresDestination(); + final DataSource dataSource = postgresDestination.getDataSource(config); + database = new DefaultJdbcDatabase(dataSource, new PostgresSourceOperations()); + } + + @AfterAll + public static void teardownPostgres() { + testContainer.close(); + } + + @Override + protected JdbcDatabase getDatabase() { + return database; + } + + @Override + protected DataType getStructType() { + return JSONB_TYPE; + } + + @Override + protected JdbcSqlGenerator getSqlGenerator() { + return new PostgresSqlGenerator(new PostgresSQLNameTransformer()); + } + + @Override + protected DestinationHandler getDestinationHandler() { + return new JdbcDestinationHandler(databaseName, database); + } + + @Override + protected SQLDialect getSqlDialect() { + return SQLDialect.POSTGRES; + } + + @Override + protected Field toJsonValue(final String valueAsString) { + return DSL.cast(DSL.val(valueAsString), JSONB_TYPE); + } + + @Test + @Override + public void testCreateTableIncremental() throws Exception { + final Sql sql = generator.createTable(incrementalDedupStream, "", false); + destinationHandler.execute(sql); + + final Optional existingTable = destinationHandler.findExistingTable(incrementalDedupStream.id()); + + assertTrue(existingTable.isPresent()); + assertAll( + () -> assertEquals("varchar", existingTable.get().columns().get("_airbyte_raw_id").type()), + () -> assertEquals("timestamptz", existingTable.get().columns().get("_airbyte_extracted_at").type()), + () -> assertEquals("jsonb", existingTable.get().columns().get("_airbyte_meta").type()), + () -> assertEquals("int8", existingTable.get().columns().get("id1").type()), + () -> assertEquals("int8", existingTable.get().columns().get("id2").type()), + () -> assertEquals("timestamptz", existingTable.get().columns().get("updated_at").type()), + () -> assertEquals("jsonb", existingTable.get().columns().get("struct").type()), + () -> assertEquals("jsonb", existingTable.get().columns().get("array").type()), + () -> assertEquals("varchar", existingTable.get().columns().get("string").type()), + () -> assertEquals("numeric", existingTable.get().columns().get("number").type()), + () -> assertEquals("int8", existingTable.get().columns().get("integer").type()), + () -> assertEquals("bool", existingTable.get().columns().get("boolean").type()), + () -> assertEquals("timestamptz", existingTable.get().columns().get("timestamp_with_timezone").type()), + () -> assertEquals("timestamp", existingTable.get().columns().get("timestamp_without_timezone").type()), + () -> assertEquals("timetz", existingTable.get().columns().get("time_with_timezone").type()), + () -> assertEquals("time", existingTable.get().columns().get("time_without_timezone").type()), + () -> assertEquals("date", existingTable.get().columns().get("date").type()), + () -> assertEquals("jsonb", existingTable.get().columns().get("unknown").type())); + // TODO assert on table indexing, etc. + } + +} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresTypingDedupingTest.java b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresTypingDedupingTest.java new file mode 100644 index 000000000000..dbcb13a67781 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/java/io/airbyte/integrations/destination/postgres/typing_deduping/PostgresTypingDedupingTest.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.postgres.typing_deduping; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.db.JdbcCompatibleSourceOperations; +import io.airbyte.cdk.integrations.standardtest.destination.typing_deduping.JdbcTypingDedupingTest; +import io.airbyte.integrations.base.destination.typing_deduping.SqlGenerator; +import io.airbyte.integrations.destination.postgres.PostgresDestination; +import io.airbyte.integrations.destination.postgres.PostgresSQLNameTransformer; +import io.airbyte.integrations.destination.postgres.PostgresTestDatabase; +import javax.sql.DataSource; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +public class PostgresTypingDedupingTest extends JdbcTypingDedupingTest { + + protected static PostgresTestDatabase testContainer; + + @BeforeAll + public static void setupPostgres() { + testContainer = PostgresTestDatabase.in(PostgresTestDatabase.BaseImage.POSTGRES_13); + } + + @AfterAll + public static void teardownPostgres() { + testContainer.close(); + } + + @Override + protected ObjectNode getBaseConfig() { + final ObjectNode config = (ObjectNode) testContainer.configBuilder() + .with("schema", "public") + .withDatabase() + .withResolvedHostAndPort() + .withCredentials() + .withoutSsl() + .build(); + return config.put("use_1s1t_format", true); + } + + @Override + protected DataSource getDataSource(final JsonNode config) { + // Intentionally ignore the config and rebuild it. + // The config param has the resolved (i.e. in-docker) host/port. + // We need the unresolved host/port since the test wrapper code is running from the docker host + // rather than in a container. + return new PostgresDestination().getDataSource(testContainer.configBuilder() + .with("schema", "public") + .withDatabase() + .withHostAndPort() + .withCredentials() + .withoutSsl() + .build()); + } + + @Override + protected String getImageName() { + return "airbyte/destination-postgres:dev"; + } + + @Override + protected SqlGenerator getSqlGenerator() { + return new PostgresSqlGenerator(new PostgresSQLNameTransformer()); + } + + @Override + protected JdbcCompatibleSourceOperations getSourceOperations() { + return new PostgresSqlGeneratorIntegrationTest.PostgresSourceOperations(); + } + +} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl new file mode 100644 index 000000000000..9f11b2293a95 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "old_cursor": 1, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl new file mode 100644 index 000000000000..7f75f0f804e2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl @@ -0,0 +1,4 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name" :"Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl new file mode 100644 index 000000000000..c805113dc6c2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl @@ -0,0 +1,4 @@ +// Keep the Alice record with more recent updated_at +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00.000000Z", "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00.000000Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00.000000Z", "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl new file mode 100644 index 000000000000..b2bf47df66c1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl @@ -0,0 +1 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00.000000Z", "name": "Someone completely different"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl new file mode 100644 index 000000000000..8aa852183061 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl @@ -0,0 +1,5 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00.000000Z", "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00.000000Z", "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00.000000Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}} +// Invalid columns are nulled out (i.e. SQL null, not JSON null) +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00.000000Z", "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..80fac124d28d --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl @@ -0,0 +1,5 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +// Invalid data is still allowed in the raw table. +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl new file mode 100644 index 000000000000..b489accda1bb --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl @@ -0,0 +1 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different"}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl new file mode 100644 index 000000000000..c26d4a49aacd --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}} +// Charlie wasn't reemitted with updated_at, so it still has a null cursor +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl new file mode 100644 index 000000000000..03f28e155af5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl @@ -0,0 +1,7 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name" :"Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl new file mode 100644 index 000000000000..6e9258bab255 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl @@ -0,0 +1,8 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00.000000Z", "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00.000000Z", "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00.000000Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00.000000Z", "name": "Charlie"} + +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Bob", "address": {"city": "New York", "state": "NY"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00.000000Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00.000000Z"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl new file mode 100644 index 000000000000..9d1f1499469f --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Bob", "address": {"city": "New York", "state": "NY"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00.000000Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00.000000Z"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl new file mode 100644 index 000000000000..33bc3280be27 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl new file mode 100644 index 000000000000..13c59b2f9912 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}} +// Delete Bob, keep Charlie +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00.000000Z", "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl new file mode 100644 index 000000000000..53c304c89d31 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl @@ -0,0 +1 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2001-01-02T00:00:00.000000Z", "name": "Someone completely different v2"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..32a7e57b1c14 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl @@ -0,0 +1,9 @@ +// We keep the records from the first sync +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +// And append the records from the second sync +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl new file mode 100644 index 000000000000..88b8ee7746c1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl @@ -0,0 +1,2 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different v2"}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_final.jsonl new file mode 100644 index 000000000000..76d0442ebe79 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_final.jsonl @@ -0,0 +1,8 @@ +{"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "unknown": null, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "unknown": null, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": ["Problem with `struct`", "Problem with `array`", "Problem with `number`", "Problem with `integer`", "Problem with `boolean`","Problem with `timestamp_with_timezone`", "Problem with `timestamp_without_timezone`", "Problem with `time_with_timezone`","Problem with `time_without_timezone`", "Problem with `date`"]}} +// Note that for numbers where we parse the value to JSON (struct, array, unknown) we lose precision. +// But for numbers where we create a NUMBER column, we do not lose precision (see the `number` column). +{"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "number": 67.174118, "struct": {"nested_number": 67.174118}, "array": [67.174118], "unknown": 67.174118, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 6, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "IamACaseSensitiveColumnName": "Case senstive value", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..6b99169ececf --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_raw.jsonl @@ -0,0 +1,6 @@ +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}} +{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z"}} +{"_airbyte_raw_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}} +{"_airbyte_raw_id": "a4a783b5-7729-4d0b-b659-48ceb08713f1", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "number": 67.174118, "struct": {"nested_number": 67.174118}, "array": [67.174118], "unknown": 67.174118}} +{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fce", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 6, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "IamACaseSensitiveColumnName": "Case senstive value"}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_final.jsonl new file mode 100644 index 000000000000..5842f7b37e42 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_final.jsonl @@ -0,0 +1,2 @@ +{"_airbyte_raw_id": "80c99b54-54b4-43bd-b51b-1f67dafa2c52", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00.000000Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84} +{"_airbyte_raw_id": "ad690bfb-c2c2-4172-bd73-a16c86ccbb67", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": ["Problem with `integer`"]}, "id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:00.000000Z", "string": "Bob"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..63569975abc2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_raw.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_raw_id": "d7b81af0-01da-4846-a650-cc398986bc99", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}} +{"_airbyte_raw_id": "80c99b54-54b4-43bd-b51b-1f67dafa2c52", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}} +{"_airbyte_raw_id": "ad690bfb-c2c2-4172-bd73-a16c86ccbb67", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_final.jsonl new file mode 100644 index 000000000000..edcc0cc462d6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_final.jsonl @@ -0,0 +1,5 @@ +{"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "[\"I\", \"am\", \"an\", \"array\"]", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "{\"I\": \"am\", \"an\": \"object\"}", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "true", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "3.14", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "I am a valid json string", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..5c10203c7837 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_raw.jsonl @@ -0,0 +1,5 @@ +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": ["I", "am", "an", "array"], "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": {"I": "am", "an": "object"}, "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": true, "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": 3.14, "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "a4a783b5-7729-4d0b-b659-48ceb08713f1", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "I am a valid json string", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_final.jsonl new file mode 100644 index 000000000000..4ecd95d83b63 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_final.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..cd7c03aba677 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_raw.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {}} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/reservedkeywords_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/reservedkeywords_expectedrecords_final.jsonl new file mode 100644 index 000000000000..b34ad054ab33 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/reservedkeywords_expectedrecords_final.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id":"b2e0efc4-38a8-47ba-970c-8103f09f08d5","_airbyte_extracted_at":"2023-01-01T00:00:00.000000Z","_airbyte_meta":{"errors":[]}, "current_date": "foo", "join": "bar"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/timestampformats_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/timestampformats_expectedrecords_final.jsonl new file mode 100644 index 000000000000..78ded5f99d0e --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/timestampformats_expectedrecords_final.jsonl @@ -0,0 +1,16 @@ +// https://docs.aws.amazon.com/redshift/latest/dg/r_Datetime_types.html#r_Datetime_types-timetz +// TIME, TIMETZ, TIMESTAMP, TIMESTAMPTZ values are UTC in user tables. +// Note that redshift stores precision to microseconds. Java deserialization in tests preserves them only for non-zero values +// except for timestamp with time zone where Z is required at end for even zero values +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "time_with_timezone": "12:34:56Z"} +{"_airbyte_raw_id": "05028c5f-7813-4e9c-bd4b-387d1f8ba435", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T20:34:56.000000Z", "time_with_timezone": "12:34:56-08:00"} +{"_airbyte_raw_id": "95dfb0c6-6a67-4ba0-9935-643bebc90437", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T20:34:56.000000Z", "time_with_timezone": "12:34:56-08:00"} +{"_airbyte_raw_id": "f3d8abe2-bb0f-4caf-8ddc-0641df02f3a9", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T20:34:56.000000Z", "time_with_timezone": "12:34:56-08:00"} +{"_airbyte_raw_id": "a81ed40a-2a49-488d-9714-d53e8b052968", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T04:34:56.000000Z", "time_with_timezone": "12:34:56+08:00"} +{"_airbyte_raw_id": "c07763a0-89e6-4cb7-b7d0-7a34a7c9918a", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T04:34:56.000000Z", "time_with_timezone": "12:34:56+08:00"} +{"_airbyte_raw_id": "358d3b52-50ab-4e06-9094-039386f9bf0d", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T04:34:56.000000Z", "time_with_timezone": "12:34:56+08:00"} +{"_airbyte_raw_id": "db8200ac-b2b9-4b95-a053-8a0343042751", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T12:34:56.123000Z", "time_with_timezone": "12:34:56.123Z"} + +{"_airbyte_raw_id": "10ce5d93-6923-4217-a46f-103833837038", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_without_timezone": "2023-01-23T12:34:56", "time_without_timezone": "12:34:56", "date": "2023-01-23"} +// Bigquery returns 6 decimal places if there are any decimal places... but not for timestamp_with_timezone +{"_airbyte_raw_id": "a7a6e176-7464-4a0b-b55c-b4f936e8d5a1", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_without_timezone": "2023-01-23T12:34:56.123", "time_without_timezone": "12:34:56.123"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_final.jsonl new file mode 100644 index 000000000000..adfbd06d6a55 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_final.jsonl @@ -0,0 +1,9 @@ +// column renamings: +// * $starts_with_dollar_sign -> _starts_with_dollar_sign +// * includes"doublequote -> includes_doublequote +// * includes'singlequote -> includes_singlequote +// * includes`backtick -> includes_backtick +// * includes$$doubledollar -> includes__doubledollar +// * includes.period -> includes_period +// * endswithbackslash\ -> endswithbackslash_ +{"_airbyte_raw_id": "7e7330a1-42fb-41ec-a955-52f18bd61964", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00.000000Z", "_starts_with_dollar_sign": "foo", "includes_doublequote": "foo", "includes_singlequote": "foo", "includes_backtick": "foo", "includes_period": "foo", "includes__doubledollar": "foo", "endswithbackslash_": "foo"} diff --git a/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..2b602082a349 --- /dev/null +++ b/airbyte-integrations/connectors/destination-postgres/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_raw.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id": "7e7330a1-42fb-41ec-a955-52f18bd61964", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00Z", "$starts_with_dollar_sign": "foo", "includes\"doublequote": "foo", "includes'singlequote": "foo", "includes`backtick": "foo", "includes.period": "foo", "includes$$doubledollar": "foo", "endswithbackslash\\": "foo"}} diff --git a/docs/integrations/destinations/postgres.md b/docs/integrations/destinations/postgres.md index 32dbef5d9243..454b399d0609 100644 --- a/docs/integrations/destinations/postgres.md +++ b/docs/integrations/destinations/postgres.md @@ -170,6 +170,7 @@ Now that you have set up the Postgres destination connector, check out the follo | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------| +| 0.5.4 | 2024-01-11 | [34177](https://github.com/airbytehq/airbyte/pull/34177) | Add code for DV2 beta (no user-visible changes) | | 0.5.3 | 2024-01-10 | [34135](https://github.com/airbytehq/airbyte/pull/34135) | Use published CDK missed in previous release | | 0.5.2 | 2024-01-08 | [33875](https://github.com/airbytehq/airbyte/pull/33875) | Update CDK to get Tunnel heartbeats feature | | 0.5.1 | 2024-01-04 | [33873](https://github.com/airbytehq/airbyte/pull/33873) | Install normalization to enable DV2 beta | @@ -190,4 +191,4 @@ Now that you have set up the Postgres destination connector, check out the follo | 0.3.13 | 2021-12-01 | [\#8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | | 0.3.12 | 2021-11-08 | [\#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | | 0.3.11 | 2021-09-07 | [\#5743](https://github.com/airbytehq/airbyte/pull/5743) | Add SSH Tunnel support | -| 0.3.10 | 2021-08-11 | [\#5336](https://github.com/airbytehq/airbyte/pull/5336) | Destination Postgres: fix \u0000\(NULL\) value processing | \ No newline at end of file +| 0.3.10 | 2021-08-11 | [\#5336](https://github.com/airbytehq/airbyte/pull/5336) | Destination Postgres: fix \u0000\(NULL\) value processing | From a9f2b2955dc4a745b7920022d1dec27e488b60a8 Mon Sep 17 00:00:00 2001 From: benmoriceau Date: Thu, 18 Jan 2024 00:45:51 +0000 Subject: [PATCH 11/12] Bump Airbyte version from 0.50.43 to 0.50.44 --- .bumpversion.cfg | 2 +- docs/operator-guides/upgrading-airbyte.md | 2 +- gradle.properties | 2 +- run-ab-platform.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 9a2f045a1537..57fbc2cdd463 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.50.43 +current_version = 0.50.44 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 6029020327f4..4f73ce785940 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -128,7 +128,7 @@ If you are upgrading from (i.e. your current version of Airbyte is) Airbyte vers Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.50.43 --\ + docker run --rm -v /tmp:/config airbyte/migration:0.50.44 --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/gradle.properties b/gradle.properties index 4d7f4ccd2228..9d7fb27ae04c 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,4 +1,4 @@ -VERSION=0.50.43 +VERSION=0.50.44 # NOTE: some of these values are overwritten in CI! # NOTE: if you want to override this for your local machine, set overrides in ~/.gradle/gradle.properties diff --git a/run-ab-platform.sh b/run-ab-platform.sh index ffd721a29b64..2f347c8545d5 100755 --- a/run-ab-platform.sh +++ b/run-ab-platform.sh @@ -1,6 +1,6 @@ #!/bin/bash -VERSION=0.50.43 +VERSION=0.50.44 # Run away from anything even a little scary set -o nounset # -u exit if a variable is not set set -o errexit # -f exit for any command failure" From 1b0ad2403fd4236fb38f88c5a0e29861d8fa212b Mon Sep 17 00:00:00 2001 From: Evan Tahler Date: Wed, 17 Jan 2024 17:45:12 -0800 Subject: [PATCH 12/12] source-faker: unique state messages (#34344) --- .../connectors/source-faker/Dockerfile | 2 +- .../connectors/source-faker/metadata.yaml | 2 +- .../source-faker/source_faker/streams.py | 8 ++++---- docs/integrations/sources/faker.md | 18 ++++++++++++------ 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/source-faker/Dockerfile b/airbyte-integrations/connectors/source-faker/Dockerfile index e880e4f38bee..9db110142dbc 100644 --- a/airbyte-integrations/connectors/source-faker/Dockerfile +++ b/airbyte-integrations/connectors/source-faker/Dockerfile @@ -34,5 +34,5 @@ COPY source_faker ./source_faker ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=5.0.1 +LABEL io.airbyte.version=5.0.2 LABEL io.airbyte.name=airbyte/source-faker diff --git a/airbyte-integrations/connectors/source-faker/metadata.yaml b/airbyte-integrations/connectors/source-faker/metadata.yaml index fdd0575e480b..e228708b816e 100644 --- a/airbyte-integrations/connectors/source-faker/metadata.yaml +++ b/airbyte-integrations/connectors/source-faker/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: dfd88b22-b603-4c3d-aad7-3701784586b1 - dockerImageTag: 5.0.1 + dockerImageTag: 5.0.2 dockerRepository: airbyte/source-faker documentationUrl: https://docs.airbyte.com/integrations/sources/faker githubIssueLabel: source-faker diff --git a/airbyte-integrations/connectors/source-faker/source_faker/streams.py b/airbyte-integrations/connectors/source-faker/source_faker/streams.py index ba7d70b7dd2c..002866ba7c54 100644 --- a/airbyte-integrations/connectors/source-faker/source_faker/streams.py +++ b/airbyte-integrations/connectors/source-faker/source_faker/streams.py @@ -119,9 +119,9 @@ def read_records(self, **kwargs) -> Iterable[Mapping[str, Any]]: if records_remaining_this_loop == 0: break - self.state = {"seed": self.seed, "updated_at": updated_at} + self.state = {"seed": self.seed, "updated_at": updated_at, "loop_offset": loop_offset} - self.state = {"seed": self.seed, "updated_at": updated_at} + self.state = {"seed": self.seed, "updated_at": updated_at, "loop_offset": loop_offset} class Purchases(Stream, IncrementalMixin): @@ -180,6 +180,6 @@ def read_records(self, **kwargs) -> Iterable[Mapping[str, Any]]: if records_remaining_this_loop == 0: break - self.state = {"seed": self.seed, "updated_at": updated_at} + self.state = {"seed": self.seed, "updated_at": updated_at, "loop_offset": loop_offset} - self.state = {"seed": self.seed, "updated_at": updated_at} + self.state = {"seed": self.seed, "updated_at": updated_at, "loop_offset": loop_offset} diff --git a/docs/integrations/sources/faker.md b/docs/integrations/sources/faker.md index e7d8e3487814..39a58897e124 100644 --- a/docs/integrations/sources/faker.md +++ b/docs/integrations/sources/faker.md @@ -2,11 +2,13 @@ ## Sync overview -The Sample Data (Faker) source generates sample data using the python [`mimesis`](https://mimesis.name/en/master/) package. +The Sample Data (Faker) source generates sample data using the python +[`mimesis`](https://mimesis.name/en/master/) package. ### Output schema -This source will generate an "e-commerce-like" dataset with users, products, and purchases. Here's what is produced at a Postgres destination connected to this source: +This source will generate an "e-commerce-like" dataset with users, products, and purchases. Here's +what is produced at a Postgres destination connected to this source: ```sql CREATE TABLE "public"."users" ( @@ -84,9 +86,12 @@ CREATE TABLE "public"."purchases" ( | Incremental Sync | Yes | | | Namespaces | No | | -Of note, if you choose `Incremental Sync`, state will be maintained between syncs, and once you hit `count` records, no new records will be added. +Of note, if you choose `Incremental Sync`, state will be maintained between syncs, and once you hit +`count` records, no new records will be added. -You can choose a specific `seed` (integer) as an option for this connector which will guarantee that the same fake records are generated each time. Otherwise, random data will be created on each subsequent sync. +You can choose a specific `seed` (integer) as an option for this connector which will guarantee that +the same fake records are generated each time. Otherwise, random data will be created on each +subsequent sync. ### Requirements @@ -95,8 +100,9 @@ None! ## Changelog | Version | Date | Pull Request | Subject | -|:--------|:-----------|:----------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------| -| 5.0.1 | 2023-01-08 | [34033](https://github.com/airbytehq/airbyte/pull/34033) | Add standard entrypoints for usage with AirbyteLib | +| :------ | :--------- | :-------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------- | +| 5.0.2 | 2024-01-17 | [34344](https://github.com/airbytehq/airbyte/pull/34344) | Ensure unique state messages | +| 5.0.1 | 2023-01-08 | [34033](https://github.com/airbytehq/airbyte/pull/34033) | Add standard entrypoints for usage with AirbyteLib | | 5.0.0 | 2023-08-08 | [29213](https://github.com/airbytehq/airbyte/pull/29213) | Change all `*id` fields and `products.year` to be integer | | 4.0.0 | 2023-07-19 | [28485](https://github.com/airbytehq/airbyte/pull/28485) | Bump to test publication | | 3.0.2 | 2023-07-07 | [27807](https://github.com/airbytehq/airbyte/pull/28060) | Bump to test publication |