From 210be7068109cdc045560f3af2eb3cf44e205fe8 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 11 Jun 2024 22:05:57 +0200 Subject: [PATCH] fixes table builder tests --- dlt/common/typing.py | 10 ++--- tests/.dlt/config.toml | 4 +- tests/common/cases/normalizers/__init__.py | 0 tests/common/cases/normalizers/title_case.py | 14 +++++++ .../bigquery/test_bigquery_table_builder.py | 17 +++++--- .../test_clickhouse_table_builder.py | 3 ++ tests/load/dremio/test_dremio_client.py | 3 ++ .../load/duckdb/test_duckdb_table_builder.py | 3 ++ ...entials.py => test_mssql_configuration.py} | 33 ++++++++++++++- tests/load/mssql/test_mssql_table_builder.py | 12 +++--- .../postgres/test_postgres_table_builder.py | 42 +++++++++++++++++-- tests/load/redshift/test_redshift_client.py | 35 +++++++++++++++- .../redshift/test_redshift_table_builder.py | 2 + .../snowflake/test_snowflake_table_builder.py | 3 +- .../synapse/test_synapse_configuration.py | 40 +++++++++++++++++- .../synapse/test_synapse_table_builder.py | 13 +++--- tests/load/test_sql_client.py | 2 +- tests/load/weaviate/test_pipeline.py | 6 ++- tests/load/weaviate/test_weaviate_client.py | 14 +++++-- tests/pipeline/test_dlt_versions.py | 13 ++++-- 20 files changed, 230 insertions(+), 39 deletions(-) create mode 100644 tests/common/cases/normalizers/__init__.py create mode 100644 tests/common/cases/normalizers/title_case.py rename tests/load/mssql/{test_mssql_credentials.py => test_mssql_configuration.py} (77%) diff --git a/dlt/common/typing.py b/dlt/common/typing.py index 15fe3f0649..cd535cdcd2 100644 --- a/dlt/common/typing.py +++ b/dlt/common/typing.py @@ -377,12 +377,12 @@ def get_generic_type_argument_from_instance( Type[Any]: type argument or Any if not known """ orig_param_type = Any - # instance of class deriving from generic - if bases_ := get_original_bases(instance): - cls_ = bases_[0] - else: + if cls_ := getattr(instance, "__orig_class__", None): # instance of generic class - cls_ = getattr(instance, "__orig_class__", None) + pass + elif bases_ := get_original_bases(instance): + # instance of class deriving from generic + cls_ = bases_[0] if cls_: orig_param_type = get_args(cls_)[0] if orig_param_type is Any and sample_value is not None: diff --git a/tests/.dlt/config.toml b/tests/.dlt/config.toml index ba86edf417..ec0df4fc15 100644 --- a/tests/.dlt/config.toml +++ b/tests/.dlt/config.toml @@ -1,5 +1,5 @@ -[runtime] -sentry_dsn="https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" +# [runtime] +# sentry_dsn="https://6f6f7b6f8e0f458a89be4187603b55fe@o1061158.ingest.sentry.io/4504819859914752" [tests] bucket_url_gs="gs://ci-test-bucket" diff --git a/tests/common/cases/normalizers/__init__.py b/tests/common/cases/normalizers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/common/cases/normalizers/title_case.py b/tests/common/cases/normalizers/title_case.py new file mode 100644 index 0000000000..41eb96fcda --- /dev/null +++ b/tests/common/cases/normalizers/title_case.py @@ -0,0 +1,14 @@ +from dlt.common.normalizers.naming.direct import NamingConvention as DirectNamingConvention + + +class NamingConvention(DirectNamingConvention): + """Test case sensitive naming that capitalizes first and last letter and leaves the rest intact""" + + PATH_SEPARATOR = "__" + + def normalize_identifier(self, identifier: str) -> str: + # keep prefix + if identifier == "_dlt": + return "_dlt" + identifier = super().normalize_identifier(identifier) + return identifier[0].upper() + identifier[1:-1] + identifier[-1].upper() diff --git a/tests/load/bigquery/test_bigquery_table_builder.py b/tests/load/bigquery/test_bigquery_table_builder.py index df564192dc..a38b1746f4 100644 --- a/tests/load/bigquery/test_bigquery_table_builder.py +++ b/tests/load/bigquery/test_bigquery_table_builder.py @@ -21,11 +21,15 @@ from dlt.common.schema import Schema from dlt.common.utils import custom_environ from dlt.common.utils import uniq_id + from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate +from dlt.destinations.impl.bigquery import capabilities from dlt.destinations.impl.bigquery.bigquery import BigQueryClient from dlt.destinations.impl.bigquery.bigquery_adapter import bigquery_adapter from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration + from dlt.extract import DltResource + from tests.load.pipeline.utils import ( destinations_configs, DestinationTestConfiguration, @@ -63,6 +67,7 @@ def gcp_client(empty_schema: Schema) -> BigQueryClient: BigQueryClientConfiguration(credentials=creds)._bind_dataset_name( dataset_name=f"test_{uniq_id()}" ), + capabilities(), ) @@ -89,9 +94,9 @@ def test_create_table(gcp_client: BigQueryClient) -> None: sqlfluff.parse(sql, dialect="bigquery") assert sql.startswith("CREATE TABLE") assert "event_test_table" in sql - assert "`col1` INTEGER NOT NULL" in sql + assert "`col1` INT64 NOT NULL" in sql assert "`col2` FLOAT64 NOT NULL" in sql - assert "`col3` BOOLEAN NOT NULL" in sql + assert "`col3` BOOL NOT NULL" in sql assert "`col4` TIMESTAMP NOT NULL" in sql assert "`col5` STRING " in sql assert "`col6` NUMERIC(38,9) NOT NULL" in sql @@ -100,7 +105,7 @@ def test_create_table(gcp_client: BigQueryClient) -> None: assert "`col9` JSON NOT NULL" in sql assert "`col10` DATE" in sql assert "`col11` TIME" in sql - assert "`col1_precision` INTEGER NOT NULL" in sql + assert "`col1_precision` INT64 NOT NULL" in sql assert "`col4_precision` TIMESTAMP NOT NULL" in sql assert "`col5_precision` STRING(25) " in sql assert "`col6_precision` NUMERIC(6,2) NOT NULL" in sql @@ -119,9 +124,9 @@ def test_alter_table(gcp_client: BigQueryClient) -> None: assert sql.startswith("ALTER TABLE") assert sql.count("ALTER TABLE") == 1 assert "event_test_table" in sql - assert "ADD COLUMN `col1` INTEGER NOT NULL" in sql + assert "ADD COLUMN `col1` INT64 NOT NULL" in sql assert "ADD COLUMN `col2` FLOAT64 NOT NULL" in sql - assert "ADD COLUMN `col3` BOOLEAN NOT NULL" in sql + assert "ADD COLUMN `col3` BOOL NOT NULL" in sql assert "ADD COLUMN `col4` TIMESTAMP NOT NULL" in sql assert "ADD COLUMN `col5` STRING" in sql assert "ADD COLUMN `col6` NUMERIC(38,9) NOT NULL" in sql @@ -130,7 +135,7 @@ def test_alter_table(gcp_client: BigQueryClient) -> None: assert "ADD COLUMN `col9` JSON NOT NULL" in sql assert "ADD COLUMN `col10` DATE" in sql assert "ADD COLUMN `col11` TIME" in sql - assert "ADD COLUMN `col1_precision` INTEGER NOT NULL" in sql + assert "ADD COLUMN `col1_precision` INT64 NOT NULL" in sql assert "ADD COLUMN `col4_precision` TIMESTAMP NOT NULL" in sql assert "ADD COLUMN `col5_precision` STRING(25)" in sql assert "ADD COLUMN `col6_precision` NUMERIC(6,2) NOT NULL" in sql diff --git a/tests/load/clickhouse/test_clickhouse_table_builder.py b/tests/load/clickhouse/test_clickhouse_table_builder.py index fd3bf50907..653ca33c38 100644 --- a/tests/load/clickhouse/test_clickhouse_table_builder.py +++ b/tests/load/clickhouse/test_clickhouse_table_builder.py @@ -6,6 +6,8 @@ from dlt.common.schema import Schema from dlt.common.utils import custom_environ, digest128 from dlt.common.utils import uniq_id + +from dlt.destinations.impl.clickhouse import capabilities from dlt.destinations.impl.clickhouse.clickhouse import ClickHouseClient from dlt.destinations.impl.clickhouse.configuration import ( ClickHouseCredentials, @@ -21,6 +23,7 @@ def clickhouse_client(empty_schema: Schema) -> ClickHouseClient: return ClickHouseClient( empty_schema, ClickHouseClientConfiguration(credentials=creds)._bind_dataset_name(f"test_{uniq_id()}"), + capabilities(), ) diff --git a/tests/load/dremio/test_dremio_client.py b/tests/load/dremio/test_dremio_client.py index d0002dc343..a690472f97 100644 --- a/tests/load/dremio/test_dremio_client.py +++ b/tests/load/dremio/test_dremio_client.py @@ -1,6 +1,8 @@ import pytest from dlt.common.schema import TColumnSchema, Schema + +from dlt.destinations.impl.dremio import capabilities from dlt.destinations.impl.dremio.configuration import DremioClientConfiguration, DremioCredentials from dlt.destinations.impl.dremio.dremio import DremioClient from tests.load.utils import empty_schema @@ -15,6 +17,7 @@ def dremio_client(empty_schema: Schema) -> DremioClient: DremioClientConfiguration(credentials=creds)._bind_dataset_name( dataset_name="test_dataset" ), + capabilities(), ) diff --git a/tests/load/duckdb/test_duckdb_table_builder.py b/tests/load/duckdb/test_duckdb_table_builder.py index 545f182ece..b492fa1747 100644 --- a/tests/load/duckdb/test_duckdb_table_builder.py +++ b/tests/load/duckdb/test_duckdb_table_builder.py @@ -5,6 +5,7 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema +from dlt.destinations.impl.duckdb import capabilities from dlt.destinations.impl.duckdb.duck import DuckDbClient from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration @@ -25,6 +26,7 @@ def client(empty_schema: Schema) -> DuckDbClient: return DuckDbClient( empty_schema, DuckDbClientConfiguration()._bind_dataset_name(dataset_name="test_" + uniq_id()), + capabilities(), ) @@ -122,6 +124,7 @@ def test_create_table_with_hints(client: DuckDbClient) -> None: DuckDbClientConfiguration(create_indexes=True)._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) sql = client._get_table_update_sql("event_test_table", mod_update, False)[0] sqlfluff.parse(sql) diff --git a/tests/load/mssql/test_mssql_credentials.py b/tests/load/mssql/test_mssql_configuration.py similarity index 77% rename from tests/load/mssql/test_mssql_credentials.py rename to tests/load/mssql/test_mssql_configuration.py index 7d49196531..75af101e23 100644 --- a/tests/load/mssql/test_mssql_credentials.py +++ b/tests/load/mssql/test_mssql_configuration.py @@ -1,15 +1,46 @@ +import os import pyodbc import pytest from dlt.common.configuration import resolve_configuration, ConfigFieldMissingException from dlt.common.exceptions import SystemConfigurationException +from dlt.common.schema import Schema -from dlt.destinations.impl.mssql.configuration import MsSqlCredentials +from dlt.destinations import mssql +from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, MsSqlClientConfiguration # mark all tests as essential, do not remove pytestmark = pytest.mark.essential +def test_mssql_factory() -> None: + schema = Schema("schema") + dest = mssql() + client = dest.client(schema, MsSqlClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is False + assert client.config.has_case_sensitive_identifiers is False + assert client.capabilities.has_case_sensitive_identifiers is False + assert client.capabilities.casefold_identifier is str + + # set args explicitly + dest = mssql(has_case_sensitive_identifiers=True, create_indexes=True) + client = dest.client(schema, MsSqlClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is True + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + # set args via config + os.environ["DESTINATION__CREATE_INDEXES"] = "True" + os.environ["DESTINATION__HAS_CASE_SENSITIVE_IDENTIFIERS"] = "True" + dest = mssql() + client = dest.client(schema, MsSqlClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is True + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + def test_mssql_credentials_defaults() -> None: creds = MsSqlCredentials() assert creds.port == 1433 diff --git a/tests/load/mssql/test_mssql_table_builder.py b/tests/load/mssql/test_mssql_table_builder.py index f7a87c14ee..79126572fb 100644 --- a/tests/load/mssql/test_mssql_table_builder.py +++ b/tests/load/mssql/test_mssql_table_builder.py @@ -6,7 +6,8 @@ pytest.importorskip("dlt.destinations.impl.mssql.mssql", reason="MSSQL ODBC driver not installed") -from dlt.destinations.impl.mssql.mssql import MsSqlClient +from dlt.destinations.impl.mssql import capabilities +from dlt.destinations.impl.mssql.mssql import MsSqlJobClient from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration, MsSqlCredentials from tests.load.utils import TABLE_UPDATE, empty_schema @@ -16,17 +17,18 @@ @pytest.fixture -def client(empty_schema: Schema) -> MsSqlClient: +def client(empty_schema: Schema) -> MsSqlJobClient: # return client without opening connection - return MsSqlClient( + return MsSqlJobClient( empty_schema, MsSqlClientConfiguration(credentials=MsSqlCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) -def test_create_table(client: MsSqlClient) -> None: +def test_create_table(client: MsSqlJobClient) -> None: # non existing table sql = client._get_table_update_sql("event_test_table", TABLE_UPDATE, False)[0] sqlfluff.parse(sql, dialect="tsql") @@ -50,7 +52,7 @@ def test_create_table(client: MsSqlClient) -> None: assert '"col11_precision" time(3) NOT NULL' in sql -def test_alter_table(client: MsSqlClient) -> None: +def test_alter_table(client: MsSqlJobClient) -> None: # existing table has no columns sql = client._get_table_update_sql("event_test_table", TABLE_UPDATE, True)[0] sqlfluff.parse(sql, dialect="tsql") diff --git a/tests/load/postgres/test_postgres_table_builder.py b/tests/load/postgres/test_postgres_table_builder.py index 7566b8afce..af8f96a907 100644 --- a/tests/load/postgres/test_postgres_table_builder.py +++ b/tests/load/postgres/test_postgres_table_builder.py @@ -4,8 +4,10 @@ from dlt.common.exceptions import TerminalValueError from dlt.common.utils import uniq_id -from dlt.common.schema import Schema +from dlt.common.schema import Schema, utils +from dlt.common.destination import Destination +from dlt.destinations.impl.postgres import capabilities from dlt.destinations.impl.postgres.postgres import PostgresClient from dlt.destinations.impl.postgres.configuration import ( PostgresClientConfiguration, @@ -25,12 +27,26 @@ @pytest.fixture def client(empty_schema: Schema) -> PostgresClient: + return create_client(empty_schema) + + +@pytest.fixture +def cs_client(empty_schema: Schema) -> PostgresClient: + # change normalizer to case sensitive + empty_schema._normalizers_config["names"] = "tests.common.cases.normalizers.title_case" + empty_schema.update_normalizers() + return create_client(empty_schema) + + +def create_client(empty_schema: Schema) -> PostgresClient: # return client without opening connection + config = PostgresClientConfiguration(credentials=PostgresCredentials())._bind_dataset_name( + dataset_name="test_" + uniq_id() + ) return PostgresClient( empty_schema, - PostgresClientConfiguration(credentials=PostgresCredentials())._bind_dataset_name( - dataset_name="test_" + uniq_id() - ), + config, + Destination.adjust_capabilities(capabilities(), config, empty_schema.naming), ) @@ -125,7 +141,25 @@ def test_create_table_with_hints(client: PostgresClient) -> None: create_indexes=False, credentials=PostgresCredentials(), )._bind_dataset_name(dataset_name="test_" + uniq_id()), + capabilities(), ) sql = client._get_table_update_sql("event_test_table", mod_update, False)[0] sqlfluff.parse(sql, dialect="postgres") assert '"col2" double precision NOT NULL' in sql + + +def test_create_table_case_sensitive(cs_client: PostgresClient) -> None: + cs_client.schema.update_table( + utils.new_table("event_test_table", columns=deepcopy(TABLE_UPDATE)) + ) + sql = cs_client._get_table_update_sql( + "Event_test_tablE", + list(cs_client.schema.get_table_columns("Event_test_tablE").values()), + False, + )[0] + sqlfluff.parse(sql, dialect="postgres") + # everything capitalized + assert cs_client.sql_client.fully_qualified_dataset_name(escape=False)[0] == "T" # Test + # every line starts with "Col" + for line in sql.split("\n")[1:]: + assert line.startswith('"Col') diff --git a/tests/load/redshift/test_redshift_client.py b/tests/load/redshift/test_redshift_client.py index 03bb57c3b4..bb923df673 100644 --- a/tests/load/redshift/test_redshift_client.py +++ b/tests/load/redshift/test_redshift_client.py @@ -6,13 +6,18 @@ from dlt.common import json, pendulum from dlt.common.configuration.resolve import resolve_configuration +from dlt.common.schema.schema import Schema from dlt.common.schema.typing import VERSION_TABLE_NAME from dlt.common.storages import FileStorage from dlt.common.storages.schema_storage import SchemaStorage from dlt.common.utils import uniq_id from dlt.destinations.exceptions import DatabaseTerminalException -from dlt.destinations.impl.redshift.configuration import RedshiftCredentials +from dlt.destinations import redshift +from dlt.destinations.impl.redshift.configuration import ( + RedshiftCredentials, + RedshiftClientConfiguration, +) from dlt.destinations.impl.redshift.redshift import RedshiftClient, psycopg2 from tests.common.utils import COMMON_TEST_CASES_PATH @@ -42,6 +47,34 @@ def test_postgres_and_redshift_credentials_defaults() -> None: assert red_cred.port == 5439 +def test_redshift_factory() -> None: + schema = Schema("schema") + dest = redshift() + client = dest.client(schema, RedshiftClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.staging_iam_role is None + assert client.config.has_case_sensitive_identifiers is False + assert client.capabilities.has_case_sensitive_identifiers is False + assert client.capabilities.casefold_identifier is str.lower + + # set args explicitly + dest = redshift(has_case_sensitive_identifiers=True, staging_iam_role="LOADER") + client = dest.client(schema, RedshiftClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.staging_iam_role == "LOADER" + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + # set args via config + os.environ["DESTINATION__STAGING_IAM_ROLE"] = "LOADER" + os.environ["DESTINATION__HAS_CASE_SENSITIVE_IDENTIFIERS"] = "True" + dest = redshift() + client = dest.client(schema, RedshiftClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.staging_iam_role == "LOADER" + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + @skipifpypy def test_text_too_long(client: RedshiftClient, file_storage: FileStorage) -> None: caps = client.capabilities diff --git a/tests/load/redshift/test_redshift_table_builder.py b/tests/load/redshift/test_redshift_table_builder.py index 2427bc7cfe..b7557f8bc9 100644 --- a/tests/load/redshift/test_redshift_table_builder.py +++ b/tests/load/redshift/test_redshift_table_builder.py @@ -6,6 +6,7 @@ from dlt.common.schema import Schema from dlt.common.configuration import resolve_configuration +from dlt.destinations.impl.redshift import capabilities from dlt.destinations.impl.redshift.redshift import RedshiftClient from dlt.destinations.impl.redshift.configuration import ( RedshiftClientConfiguration, @@ -26,6 +27,7 @@ def client(empty_schema: Schema) -> RedshiftClient: RedshiftClientConfiguration(credentials=RedshiftCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) diff --git a/tests/load/snowflake/test_snowflake_table_builder.py b/tests/load/snowflake/test_snowflake_table_builder.py index bdbe888fb5..194b6bb6fb 100644 --- a/tests/load/snowflake/test_snowflake_table_builder.py +++ b/tests/load/snowflake/test_snowflake_table_builder.py @@ -5,12 +5,12 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema +from dlt.destinations.impl.snowflake import capabilities from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient from dlt.destinations.impl.snowflake.configuration import ( SnowflakeClientConfiguration, SnowflakeCredentials, ) -from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate from tests.load.utils import TABLE_UPDATE, empty_schema @@ -27,6 +27,7 @@ def snowflake_client(empty_schema: Schema) -> SnowflakeClient: SnowflakeClientConfiguration(credentials=creds)._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) diff --git a/tests/load/synapse/test_synapse_configuration.py b/tests/load/synapse/test_synapse_configuration.py index f366d87d09..8aaea03b0f 100644 --- a/tests/load/synapse/test_synapse_configuration.py +++ b/tests/load/synapse/test_synapse_configuration.py @@ -1,8 +1,11 @@ +import os import pytest from dlt.common.configuration import resolve_configuration from dlt.common.exceptions import SystemConfigurationException +from dlt.common.schema import Schema +from dlt.destinations import synapse from dlt.destinations.impl.synapse.configuration import ( SynapseClientConfiguration, SynapseCredentials, @@ -14,7 +17,42 @@ def test_synapse_configuration() -> None: # By default, unique indexes should not be created. - assert SynapseClientConfiguration().create_indexes is False + c = SynapseClientConfiguration() + assert c.create_indexes is False + assert c.has_case_sensitive_identifiers is False + assert c.staging_use_msi is False + + +def test_synapse_factory() -> None: + schema = Schema("schema") + dest = synapse() + client = dest.client(schema, SynapseClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is False + assert client.config.staging_use_msi is False + assert client.config.has_case_sensitive_identifiers is False + assert client.capabilities.has_case_sensitive_identifiers is False + assert client.capabilities.casefold_identifier is str + + # set args explicitly + dest = synapse(has_case_sensitive_identifiers=True, create_indexes=True, staging_use_msi=True) + client = dest.client(schema, SynapseClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is True + assert client.config.staging_use_msi is True + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str + + # set args via config + os.environ["DESTINATION__CREATE_INDEXES"] = "True" + os.environ["DESTINATION__STAGING_USE_MSI"] = "True" + os.environ["DESTINATION__HAS_CASE_SENSITIVE_IDENTIFIERS"] = "True" + dest = synapse() + client = dest.client(schema, SynapseClientConfiguration()._bind_dataset_name("dataset")) + assert client.config.create_indexes is True + assert client.config.staging_use_msi is True + assert client.config.has_case_sensitive_identifiers is True + assert client.capabilities.has_case_sensitive_identifiers is True + assert client.capabilities.casefold_identifier is str def test_parse_native_representation() -> None: diff --git a/tests/load/synapse/test_synapse_table_builder.py b/tests/load/synapse/test_synapse_table_builder.py index 9ee2ebe202..6e3e69b3a3 100644 --- a/tests/load/synapse/test_synapse_table_builder.py +++ b/tests/load/synapse/test_synapse_table_builder.py @@ -7,17 +7,18 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema, TColumnHint -from dlt.destinations.impl.synapse.synapse import SynapseClient +from dlt.destinations.impl.synapse import capabilities +from dlt.destinations.impl.synapse.synapse import ( + SynapseClient, + HINT_TO_SYNAPSE_ATTR, + TABLE_INDEX_TYPE_TO_SYNAPSE_ATTR, +) from dlt.destinations.impl.synapse.configuration import ( SynapseClientConfiguration, SynapseCredentials, ) from tests.load.utils import TABLE_UPDATE, empty_schema -from dlt.destinations.impl.synapse.synapse import ( - HINT_TO_SYNAPSE_ATTR, - TABLE_INDEX_TYPE_TO_SYNAPSE_ATTR, -) # mark all tests as essential, do not remove pytestmark = pytest.mark.essential @@ -31,6 +32,7 @@ def client(empty_schema: Schema) -> SynapseClient: SynapseClientConfiguration(credentials=SynapseCredentials())._bind_dataset_name( dataset_name="test_" + uniq_id() ), + capabilities(), ) assert client.config.create_indexes is False return client @@ -44,6 +46,7 @@ def client_with_indexes_enabled(empty_schema: Schema) -> SynapseClient: SynapseClientConfiguration( credentials=SynapseCredentials(), create_indexes=True )._bind_dataset_name(dataset_name="test_" + uniq_id()), + capabilities(), ) assert client.config.create_indexes is True return client diff --git a/tests/load/test_sql_client.py b/tests/load/test_sql_client.py index 26d7884179..7fb7cc26c4 100644 --- a/tests/load/test_sql_client.py +++ b/tests/load/test_sql_client.py @@ -490,7 +490,7 @@ def test_transaction_isolation(client: SqlJobClientBase) -> None: def test_thread(thread_id: Decimal) -> None: # make a copy of the sql_client thread_client = client.sql_client.__class__( - client.sql_client.dataset_name, client.sql_client.credentials + client.sql_client.dataset_name, client.sql_client.credentials, client.capabilities ) with thread_client: with thread_client.begin_transaction(): diff --git a/tests/load/weaviate/test_pipeline.py b/tests/load/weaviate/test_pipeline.py index ee42ab59d8..507a4c4f8d 100644 --- a/tests/load/weaviate/test_pipeline.py +++ b/tests/load/weaviate/test_pipeline.py @@ -4,6 +4,10 @@ import dlt from dlt.common import json +from dlt.common.schema.exceptions import ( + SchemaCorruptedException, + SchemaIdentifierNormalizationClash, +) from dlt.common.utils import uniq_id from dlt.destinations.impl.weaviate import weaviate_adapter @@ -391,7 +395,7 @@ def test_vectorize_property_without_data() -> None: primary_key="vAlue", columns={"vAlue": {"data_type": "text"}}, ) - assert isinstance(pipe_ex.value.__context__, PropertyNameConflict) + assert isinstance(pipe_ex.value.__context__, SchemaIdentifierNormalizationClash) # set the naming convention to case insensitive os.environ["SCHEMA__NAMING"] = "dlt.destinations.impl.weaviate.ci_naming" diff --git a/tests/load/weaviate/test_weaviate_client.py b/tests/load/weaviate/test_weaviate_client.py index 8a1f3c51ad..730c2675f3 100644 --- a/tests/load/weaviate/test_weaviate_client.py +++ b/tests/load/weaviate/test_weaviate_client.py @@ -5,6 +5,7 @@ from dlt.common.schema import Schema from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext +from dlt.common.schema.exceptions import SchemaIdentifierNormalizationClash from dlt.common.utils import uniq_id from dlt.common.schema.typing import TWriteDisposition, TColumnSchema, TTableSchemaColumns @@ -119,8 +120,13 @@ def test_case_sensitive_properties_create(client: WeaviateClient) -> None: ) ) client.schema._bump_version() - with pytest.raises(PropertyNameConflict): + with pytest.raises(SchemaIdentifierNormalizationClash) as clash_ex: client.update_stored_schema() + assert clash_ex.value.identifier_type == "column" + assert clash_ex.value.identifier_name == "coL1" + assert clash_ex.value.conflict_identifier_name == "col1" + assert clash_ex.value.table_name == "ColClass" + assert clash_ex.value.naming_name == "dlt.destinations.impl.weaviate.naming" def test_case_insensitive_properties_create(ci_client: WeaviateClient) -> None: @@ -163,7 +169,7 @@ def test_case_sensitive_properties_add(client: WeaviateClient) -> None: ) ) client.schema._bump_version() - with pytest.raises(PropertyNameConflict): + with pytest.raises(SchemaIdentifierNormalizationClash): client.update_stored_schema() # _, table_columns = client.get_storage_table("ColClass") @@ -179,12 +185,13 @@ def test_load_case_sensitive_data(client: WeaviateClient, file_storage: FileStor client.schema.update_table(new_table(class_name, columns=[table_create["col1"]])) client.schema._bump_version() client.update_stored_schema() - # prepare a data item where is name clash due to Weaviate being CI + # prepare a data item where is name clash due to Weaviate being CS data_clash = {"col1": 72187328, "coL1": 726171} # write row with io.BytesIO() as f: write_dataset(client, f, [data_clash], table_create) query = f.getvalue().decode() + class_name = client.schema.naming.normalize_table_identifier(class_name) with pytest.raises(PropertyNameConflict): expect_load_file(client, file_storage, query, class_name) @@ -210,6 +217,7 @@ def test_load_case_sensitive_data_ci(ci_client: WeaviateClient, file_storage: Fi with io.BytesIO() as f: write_dataset(ci_client, f, [data_clash], table_create) query = f.getvalue().decode() + class_name = ci_client.schema.naming.normalize_table_identifier(class_name) expect_load_file(ci_client, file_storage, query, class_name) response = ci_client.query_class(class_name, ["col1"]).do() objects = response["data"]["Get"][ci_client.make_qualified_class_name(class_name)] diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 2c2ba0292e..26eae36dd6 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -19,6 +19,7 @@ TStoredSchema, ) from dlt.common.configuration.resolve import resolve_configuration +from dlt.destinations.impl.duckdb import capabilities from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient @@ -74,7 +75,9 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: DuckDbClientConfiguration()._bind_dataset_name(dataset_name=GITHUB_DATASET), sections=("destination", "duckdb"), ) - with DuckDbSqlClient(GITHUB_DATASET, duckdb_cfg.credentials) as client: + with DuckDbSqlClient( + GITHUB_DATASET, duckdb_cfg.credentials, capabilities() + ) as client: rows = client.execute_sql(f"SELECT * FROM {LOADS_TABLE_NAME}") # make sure we have just 4 columns assert len(rows[0]) == 4 @@ -115,7 +118,9 @@ def test_pipeline_with_dlt_update(test_storage: FileStorage) -> None: ) assert "_version_hash" in state_dict - with DuckDbSqlClient(GITHUB_DATASET, duckdb_cfg.credentials) as client: + with DuckDbSqlClient( + GITHUB_DATASET, duckdb_cfg.credentials, capabilities() + ) as client: rows = client.execute_sql( f"SELECT * FROM {LOADS_TABLE_NAME} ORDER BY inserted_at" ) @@ -195,7 +200,9 @@ def test_load_package_with_dlt_update(test_storage: FileStorage) -> None: DuckDbClientConfiguration()._bind_dataset_name(dataset_name=GITHUB_DATASET), sections=("destination", "duckdb"), ) - with DuckDbSqlClient(GITHUB_DATASET, duckdb_cfg.credentials) as client: + with DuckDbSqlClient( + GITHUB_DATASET, duckdb_cfg.credentials, capabilities() + ) as client: rows = client.execute_sql("SELECT * FROM issues") assert len(rows) == 70 github_schema = json.loads(