From 21fe9c70dbf44b0bebbd766d09f2f57da0f86040 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 2 Jan 2024 13:41:18 -0800 Subject: [PATCH] wip --- .../typing_deduping/JdbcSqlGenerator.java | 17 ++-- .../typing_deduping/MysqlSqlGenerator.java | 86 +++++++++++++++++-- .../MysqlSqlGeneratorIntegrationTest.java | 11 ++- ...orchange_expectedrecords_dedup_final.jsonl | 3 + ...rsorchange_expectedrecords_dedup_raw.jsonl | 4 + .../sync1_expectedrecords_dedup_final.jsonl | 4 + .../sync1_expectedrecords_dedup_final2.jsonl | 1 + ...sync1_expectedrecords_nondedup_final.jsonl | 5 ++ .../dat/sync1_expectedrecords_raw.jsonl | 5 ++ .../dat/sync1_expectedrecords_raw2.jsonl | 1 + ...ectedrecords_incremental_dedup_final.jsonl | 3 + ...xpectedrecords_incremental_dedup_raw.jsonl | 7 ++ ...ctedrecords_fullrefresh_append_final.jsonl | 8 ++ ...drecords_fullrefresh_overwrite_final.jsonl | 3 + ...tedrecords_fullrefresh_overwrite_raw.jsonl | 3 + ...ectedrecords_incremental_dedup_final.jsonl | 3 + ...ctedrecords_incremental_dedup_final2.jsonl | 1 + .../dat/sync2_expectedrecords_raw.jsonl | 9 ++ .../dat/sync2_expectedrecords_raw2.jsonl | 2 + .../alltypes_expectedrecords_final.jsonl | 10 +++ .../alltypes_expectedrecords_raw.jsonl | 5 ++ ...crementaldedup_expectedrecords_final.jsonl | 2 + ...incrementaldedup_expectedrecords_raw.jsonl | 3 + ...ypes_in_string_expectedrecords_final.jsonl | 5 ++ ..._types_in_string_expectedrecords_raw.jsonl | 5 ++ .../nocolumns_expectedrecords_final.jsonl | 1 + .../nocolumns_expectedrecords_raw.jsonl | 1 + ...servedkeywords_expectedrecords_final.jsonl | 1 + ...mestampformats_expectedrecords_final.jsonl | 16 ++++ ...irdcolumnnames_expectedrecords_final.jsonl | 9 ++ ...weirdcolumnnames_expectedrecords_raw.jsonl | 1 + 31 files changed, 218 insertions(+), 17 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_raw.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/reservedkeywords_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/timestampformats_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_final.jsonl create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_raw.jsonl diff --git a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java index 81047871d13d..3595bb9081bb 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java +++ b/airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/java/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcSqlGenerator.java @@ -55,6 +55,8 @@ import org.jooq.DSLContext; import org.jooq.DataType; import org.jooq.Field; +import org.jooq.InsertOnDuplicateStep; +import org.jooq.InsertReturningStep; import org.jooq.InsertValuesStepN; import org.jooq.Name; import org.jooq.Record; @@ -363,7 +365,7 @@ private Sql insertAndDeleteTransaction(final StreamConfig streamConfig, select(asterisk(), rowNumber).from(rawTableRowsWithCast)); // Used for append-dedupe mode. - final String insertStmtWithDedupe = + final String insertStmtWithDedupe = mutateInsertStatement( insertIntoFinalTable(finalSchema, finalTable, streamConfig.columns(), getFinalTableMetaColumns(true)) .select(with(rawTableRowsWithCast) .with(filteredRows) @@ -371,15 +373,16 @@ private Sql insertAndDeleteTransaction(final StreamConfig streamConfig, .from(filteredRows) .where(field(name(ROW_NUMBER_COLUMN_NAME), Integer.class).eq(1)) // Can refer by CTE.field but no use since we don't strongly type them. ) - .getSQL(ParamType.INLINED); + ).getSQL(ParamType.INLINED); // Used for append and overwrite modes. - final String insertStmt = + final String insertStmt = mutateInsertStatement( insertIntoFinalTable(finalSchema, finalTable, streamConfig.columns(), getFinalTableMetaColumns(true)) .select(with(rawTableRowsWithCast) .select(finalTableFields) - .from(rawTableRowsWithCast)) - .getSQL(ParamType.INLINED); + .from(rawTableRowsWithCast) + ) + ).getSQL(ParamType.INLINED); final String deleteStmt = deleteFromFinalTable(finalSchema, finalTable, streamConfig.primaryKey(), streamConfig.cursor()); final String deleteCdcDeletesStmt = streamConfig.columns().containsKey(cdcDeletedAtColumn) ? deleteFromFinalTableCdcDeletes(finalSchema, finalTable) : ""; @@ -492,4 +495,8 @@ protected Field currentTimestamp() { return DSL.currentTimestamp(); } + protected InsertReturningStep mutateInsertStatement(final InsertOnDuplicateStep insert) { + return insert; + } + } diff --git a/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/typing_deduping/MysqlSqlGenerator.java b/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/typing_deduping/MysqlSqlGenerator.java index a39df352b172..74808646e370 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/typing_deduping/MysqlSqlGenerator.java +++ b/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/typing_deduping/MysqlSqlGenerator.java @@ -2,10 +2,12 @@ import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_META; import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_DATA; +import static org.jooq.impl.DSL.case_; import static org.jooq.impl.DSL.cast; import static org.jooq.impl.DSL.field; import static org.jooq.impl.DSL.function; import static org.jooq.impl.DSL.name; +import static org.jooq.impl.DSL.quotedName; import static org.jooq.impl.DSL.trueCondition; import static org.jooq.impl.DSL.val; @@ -14,29 +16,38 @@ import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType; import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType; +import io.airbyte.integrations.base.destination.typing_deduping.Array; import io.airbyte.integrations.base.destination.typing_deduping.ColumnId; import io.airbyte.integrations.base.destination.typing_deduping.Sql; import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig; +import io.airbyte.integrations.base.destination.typing_deduping.Struct; import java.util.LinkedHashMap; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.lang3.NotImplementedException; +import org.jooq.CaseConditionStep; import org.jooq.Condition; import org.jooq.DataType; import org.jooq.Field; +import org.jooq.InsertOnDuplicateStep; +import org.jooq.InsertReturningStep; +import org.jooq.Param; +import org.jooq.Record; import org.jooq.SQLDialect; import org.jooq.impl.DefaultDataType; -import org.jooq.impl.DSL; import org.jooq.impl.SQLDataType; public class MysqlSqlGenerator extends JdbcSqlGenerator { + + public static final DefaultDataType JSON_TYPE = new DefaultDataType<>(null, Object.class, "json"); + public MysqlSqlGenerator(final NamingConventionTransformer namingResolver) { super(namingResolver); } private DataType getJsonType() { - return new DefaultDataType<>(null, String.class, "json"); + return JSON_TYPE; } @Override @@ -79,21 +90,66 @@ protected SQLDialect getDialect() { } @Override - protected List> extractRawDataFields(final LinkedHashMap columns) { + protected List> extractRawDataFields(final LinkedHashMap columns, final boolean useExpensiveSaferCasting) { return columns .entrySet() .stream() - .map(column -> castedField( - // TODO escape jsonpath - function("JSON_EXTRACT", getJsonType(), field(name(COLUMN_NAME_DATA)), val("$." + column.getKey().originalName())), - column.getValue(), - column.getKey().name())) + .map(column -> { + final String jsonExtractFunction; + final AirbyteType type = column.getValue(); + final boolean isStruct = type instanceof Struct; + final boolean isArray = type instanceof Array; + if (type == AirbyteProtocolType.UNKNOWN || isStruct || isArray) { + // UKKNOWN should use json_extract to retain the exact json value + jsonExtractFunction = "JSON_EXTRACT"; + } else { + // And primitive types should just use json_value, to (a) strip quotes from strings, and + // (b) cast json null to sql null. + jsonExtractFunction = "JSON_VALUE"; + } + + final Field extractedValue = function(jsonExtractFunction, getJsonType(), field(name(COLUMN_NAME_DATA)), jsonPath(column.getKey())); + if (isStruct) { + return case_() + .when( + extractedValue.isNull() + .or(function("JSON_TYPE", String.class, extractedValue).ne("OBJECT")), + val((Object) null) + ).else_(extractedValue) + .as(quotedName(column.getKey().name())); + } else if (isArray) { + return case_() + .when( + extractedValue.isNull() + .or(function("JSON_TYPE", String.class, extractedValue).ne("ARRAY")), + val((Object) null) + ).else_(extractedValue) + .as(quotedName(column.getKey().name())); + } else { + final Field castedValue = castedField(extractedValue, type, column.getKey().name()); + if (!(type instanceof final AirbyteProtocolType primitive)) { + return castedValue; + } + return switch (primitive) { + // These types are just casting to strings, so we need to use regex to validate their format + case TIME_WITH_TIMEZONE -> case_() + .when(castedValue.notLikeRegex("^[0-9]{2}:[0-9]{2}:[0-9]{2}([.][0-9]+)?([-+][0-9]{2}:[0-9]{2}|Z)$"), val((Object) null)) + .else_(castedValue) + .as(quotedName(column.getKey().name())); + case TIMESTAMP_WITH_TIMEZONE -> case_() + .when(castedValue.notLikeRegex("^[0-9]+-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}([.][0-9]+)?([-+][0-9]{2}:[0-9]{2}|Z)$"), val((Object) null)) + .else_(castedValue) + .as(quotedName(column.getKey().name())); + default -> castedValue; + }; + } + }) .collect(Collectors.toList()); } @Override protected Field buildAirbyteMetaColumn(final LinkedHashMap columns) { - // TODO + // TODO Intentionally unimplemented for initial DV2 release return cast(val("{}"), getJsonType()).as(COLUMN_NAME_AB_META); } @@ -123,4 +179,16 @@ public boolean existingSchemaMatchesStreamConfig(final StreamConfig stream, fina protected String beginTransaction() { return "START TRANSACTION"; } + + @Override + protected InsertReturningStep mutateInsertStatement(final InsertOnDuplicateStep insert) { + // this turns the insert into an `INSERT IGNORE ...` + // We're actually using this to ignore CAST() errors, rather than duplicate key errors. + return insert.onDuplicateKeyIgnore(); + } + + private static Param jsonPath(final ColumnId column) { + // TODO escape jsonpath + return val("$." + column.originalName()); + } } diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/typing_deduping/MysqlSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/typing_deduping/MysqlSqlGeneratorIntegrationTest.java index 5ccfa1411825..0ebb4b9ab8a4 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/typing_deduping/MysqlSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/typing_deduping/MysqlSqlGeneratorIntegrationTest.java @@ -1,6 +1,9 @@ package io.airbyte.integrations.destination.mysql.typing_deduping; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.db.jdbc.AbstractJdbcCompatibleSourceOperations; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition; @@ -10,11 +13,13 @@ import io.airbyte.integrations.destination.mysql.MySQLDestination; import io.airbyte.integrations.destination.mysql.MySQLDestinationAcceptanceTest; import io.airbyte.integrations.destination.mysql.MySQLNameTransformer; -import java.util.List; +import io.airbyte.protocol.models.JsonSchemaType; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; import javax.sql.DataSource; import org.jooq.DataType; import org.jooq.Field; -import org.jooq.Name; import org.jooq.SQLDialect; import org.jooq.impl.DSL; import org.jooq.impl.DefaultDataType; @@ -30,7 +35,7 @@ public class MysqlSqlGeneratorIntegrationTest extends JdbcSqlGeneratorIntegratio private static JdbcDatabase database; @BeforeAll - public static void setupMysql() { + public static void setupMysql() throws Exception { testContainer = new MySQLContainer<>("mysql:8.0"); testContainer.start(); MySQLDestinationAcceptanceTest.configureTestContainer(testContainer); diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl new file mode 100644 index 000000000000..9f11b2293a95 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_final.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "old_cursor": 1, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl new file mode 100644 index 000000000000..7f75f0f804e2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_cursorchange_expectedrecords_dedup_raw.jsonl @@ -0,0 +1,4 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name" :"Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl new file mode 100644 index 000000000000..c805113dc6c2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final.jsonl @@ -0,0 +1,4 @@ +// Keep the Alice record with more recent updated_at +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00.000000Z", "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00.000000Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00.000000Z", "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl new file mode 100644 index 000000000000..b2bf47df66c1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_dedup_final2.jsonl @@ -0,0 +1 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00.000000Z", "name": "Someone completely different"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl new file mode 100644 index 000000000000..8aa852183061 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_nondedup_final.jsonl @@ -0,0 +1,5 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00.000000Z", "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00.000000Z", "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00.000000Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}} +// Invalid columns are nulled out (i.e. SQL null, not JSON null) +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00.000000Z", "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..80fac124d28d --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_raw.jsonl @@ -0,0 +1,5 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +// Invalid data is still allowed in the raw table. +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl new file mode 100644 index 000000000000..b489accda1bb --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync1_expectedrecords_raw2.jsonl @@ -0,0 +1 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different"}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl new file mode 100644 index 000000000000..c26d4a49aacd --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_final.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}} +// Charlie wasn't reemitted with updated_at, so it still has a null cursor +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl new file mode 100644 index 000000000000..03f28e155af5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_cursorchange_expectedrecords_incremental_dedup_raw.jsonl @@ -0,0 +1,7 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 0, "_ab_cdc_deleted_at": null, "name" :"Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "old_cursor": 1, "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "old_cursor": 2, "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "old_cursor": 3, "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl new file mode 100644 index 000000000000..6e9258bab255 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_append_final.jsonl @@ -0,0 +1,8 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00.000000Z", "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00.000000Z", "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00.000000Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00.000000Z", "name": "Charlie"} + +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Bob", "address": {"city": "New York", "state": "NY"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00.000000Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00.000000Z"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl new file mode 100644 index 000000000000..9d1f1499469f --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_final.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Bob", "address": {"city": "New York", "state": "NY"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00.000000Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00.000000Z"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl new file mode 100644 index 000000000000..33bc3280be27 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_fullrefresh_overwrite_raw.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl new file mode 100644 index 000000000000..13c59b2f9912 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00.000000Z", "name": "Alice", "address": {"city": "Seattle", "state": "WA"}} +// Delete Bob, keep Charlie +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_meta": {"errors":["Problem with `age`", "Problem with `registration_date`"]}, "id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00.000000Z", "name": "Charlie"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl new file mode 100644 index 000000000000..53c304c89d31 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_incremental_dedup_final2.jsonl @@ -0,0 +1 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_meta":{"errors":[]}, "id1": 1, "id2": 200, "updated_at": "2001-01-02T00:00:00.000000Z", "name": "Someone completely different v2"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..32a7e57b1c14 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_raw.jsonl @@ -0,0 +1,9 @@ +// We keep the records from the first sync +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "San Francisco", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-01T00:01:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Los Angeles", "state": "CA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-01T00:02:00Z", "name": "Bob", "address": {"city": "Boston", "state": "MA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 2, "id2": 200, "updated_at": "2000-01-01T00:03:00Z", "name": "Charlie", "age": "this is not an integer", "registration_date": "this is not a date"}} +// And append the records from the second sync +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Alice", "address": {"city": "Seattle", "state": "WA"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Bob", "address": {"city": "New York", "state": "NY"}}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 201, "updated_at": "2000-01-02T00:01:00Z", "_ab_cdc_deleted_at": "1970-01-01T00:00:00Z"}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl new file mode 100644 index 000000000000..88b8ee7746c1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/dat/sync2_expectedrecords_raw2.jsonl @@ -0,0 +1,2 @@ +{"_airbyte_extracted_at": "1970-01-01T00:00:01.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-01T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different"}} +{"_airbyte_extracted_at": "1970-01-01T00:00:02.000000Z", "_airbyte_data": {"id1": 1, "id2": 200, "updated_at": "2001-01-02T00:00:00Z", "_ab_cdc_deleted_at": null, "name": "Someone completely different v2"}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_final.jsonl new file mode 100644 index 000000000000..ea5d7fcadfd1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_final.jsonl @@ -0,0 +1,10 @@ +// Note that our JDBC interface is returning nested json as strings. +// This is because we're not passing a JdbcCompatibleSourceOperations to the DefaultJdbcDatabase in our integration test. +{"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": "[\"foo\"]", "struct": "{\"foo\": \"bar\"}", "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": "{}", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_meta": "{}"} +{"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "unknown": "null", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_meta": "{}"} +{"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_meta": "{}"} +// MySQL cast errors use non-null default values +{"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "boolean": false, "integer": 0, "number": 0, "unknown": "null", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_meta": "{}"} +// Note that for numbers where we parse the value to JSON (struct, array, unknown) we lose precision. +// But for numbers where we create a NUMBER column, we do not lose precision (see the `number` column). +{"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "number": 67.174118, "struct": "{\"nested_number\": 67.174118}", "array": "[67.174118]", "unknown": "67.174118", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_meta": "{}"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..9f89442b914f --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/alltypes_expectedrecords_raw.jsonl @@ -0,0 +1,5 @@ +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}} +{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z"}} +{"_airbyte_raw_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}} +{"_airbyte_raw_id": "a4a783b5-7729-4d0b-b659-48ceb08713f1", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "number": 67.174118, "struct": {"nested_number": 67.174118}, "array": [67.174118], "unknown": 67.174118}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_final.jsonl new file mode 100644 index 000000000000..5842f7b37e42 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_final.jsonl @@ -0,0 +1,2 @@ +{"_airbyte_raw_id": "80c99b54-54b4-43bd-b51b-1f67dafa2c52", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00.000000Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84} +{"_airbyte_raw_id": "ad690bfb-c2c2-4172-bd73-a16c86ccbb67", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": ["Problem with `integer`"]}, "id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:00.000000Z", "string": "Bob"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..63569975abc2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/incrementaldedup_expectedrecords_raw.jsonl @@ -0,0 +1,3 @@ +{"_airbyte_raw_id": "d7b81af0-01da-4846-a650-cc398986bc99", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}} +{"_airbyte_raw_id": "80c99b54-54b4-43bd-b51b-1f67dafa2c52", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}} +{"_airbyte_raw_id": "ad690bfb-c2c2-4172-bd73-a16c86ccbb67", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_final.jsonl new file mode 100644 index 000000000000..52a9c10fcc47 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_final.jsonl @@ -0,0 +1,5 @@ +{"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "[\"I\",\"am\",\"an\",\"array\"]", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "{\"I\":\"am\",\"an\":\"object\"}", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "true", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "3.14", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} +{"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00.000000Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "I am a valid json string", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}, "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..5c10203c7837 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/json_types_in_string_expectedrecords_raw.jsonl @@ -0,0 +1,5 @@ +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": ["I", "am", "an", "array"], "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": {"I": "am", "an": "object"}, "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": true, "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": 3.14, "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} +{"_airbyte_raw_id": "a4a783b5-7729-4d0b-b659-48ceb08713f1", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "I am a valid json string", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_final.jsonl new file mode 100644 index 000000000000..4ecd95d83b63 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_final.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..cd7c03aba677 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/nocolumns_expectedrecords_raw.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {}} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/reservedkeywords_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/reservedkeywords_expectedrecords_final.jsonl new file mode 100644 index 000000000000..b34ad054ab33 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/reservedkeywords_expectedrecords_final.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id":"b2e0efc4-38a8-47ba-970c-8103f09f08d5","_airbyte_extracted_at":"2023-01-01T00:00:00.000000Z","_airbyte_meta":{"errors":[]}, "current_date": "foo", "join": "bar"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/timestampformats_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/timestampformats_expectedrecords_final.jsonl new file mode 100644 index 000000000000..5a4bfc33d906 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/timestampformats_expectedrecords_final.jsonl @@ -0,0 +1,16 @@ +// https://docs.aws.amazon.com/redshift/latest/dg/r_Datetime_types.html#r_Datetime_types-timetz +// TIME, TIMETZ, TIMESTAMP, TIMESTAMPTZ values are UTC in user tables. +// Note that redshift stores precision to microseconds. Java deserialization in tests preserves them only for non-zero values +// except for timestamp with time zone where Z is required at end for even zero values +{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T12:34:56.000000Z", "time_with_timezone": "12:34:56Z"} +{"_airbyte_raw_id": "05028c5f-7813-4e9c-bd4b-387d1f8ba435", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T20:34:56.000000Z", "time_with_timezone": "20:34:56Z"} +{"_airbyte_raw_id": "95dfb0c6-6a67-4ba0-9935-643bebc90437", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T20:34:56.000000Z", "time_with_timezone": "20:34:56Z"} +{"_airbyte_raw_id": "f3d8abe2-bb0f-4caf-8ddc-0641df02f3a9", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T20:34:56.000000Z", "time_with_timezone": "20:34:56Z"} +{"_airbyte_raw_id": "a81ed40a-2a49-488d-9714-d53e8b052968", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T04:34:56.000000Z", "time_with_timezone": "04:34:56Z"} +{"_airbyte_raw_id": "c07763a0-89e6-4cb7-b7d0-7a34a7c9918a", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T04:34:56.000000Z", "time_with_timezone": "04:34:56Z"} +{"_airbyte_raw_id": "358d3b52-50ab-4e06-9094-039386f9bf0d", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T04:34:56.000000Z", "time_with_timezone": "04:34:56Z"} +{"_airbyte_raw_id": "db8200ac-b2b9-4b95-a053-8a0343042751", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_with_timezone": "2023-01-23T12:34:56.123000Z", "time_with_timezone": "12:34:56.123Z"} + +{"_airbyte_raw_id": "10ce5d93-6923-4217-a46f-103833837038", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_without_timezone": "2023-01-23T12:34:56", "time_without_timezone": "12:34:56", "date": "2023-01-23"} +// Bigquery returns 6 decimal places if there are any decimal places... but not for timestamp_with_timezone +{"_airbyte_raw_id": "a7a6e176-7464-4a0b-b55c-b4f936e8d5a1", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "timestamp_without_timezone": "2023-01-23T12:34:56.123", "time_without_timezone": "12:34:56.123"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_final.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_final.jsonl new file mode 100644 index 000000000000..adfbd06d6a55 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_final.jsonl @@ -0,0 +1,9 @@ +// column renamings: +// * $starts_with_dollar_sign -> _starts_with_dollar_sign +// * includes"doublequote -> includes_doublequote +// * includes'singlequote -> includes_singlequote +// * includes`backtick -> includes_backtick +// * includes$$doubledollar -> includes__doubledollar +// * includes.period -> includes_period +// * endswithbackslash\ -> endswithbackslash_ +{"_airbyte_raw_id": "7e7330a1-42fb-41ec-a955-52f18bd61964", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_meta": {"errors": []}, "id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00.000000Z", "_starts_with_dollar_sign": "foo", "includes_doublequote": "foo", "includes_singlequote": "foo", "includes_backtick": "foo", "includes_period": "foo", "includes__doubledollar": "foo", "endswithbackslash_": "foo"} diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_raw.jsonl b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_raw.jsonl new file mode 100644 index 000000000000..2b602082a349 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/resources/sqlgenerator/weirdcolumnnames_expectedrecords_raw.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id": "7e7330a1-42fb-41ec-a955-52f18bd61964", "_airbyte_extracted_at": "2023-01-01T00:00:00.000000Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00Z", "$starts_with_dollar_sign": "foo", "includes\"doublequote": "foo", "includes'singlequote": "foo", "includes`backtick": "foo", "includes.period": "foo", "includes$$doubledollar": "foo", "endswithbackslash\\": "foo"}}