From 3cdab60de2119e3efea1037e50aeeb582073193c Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Sat, 11 Nov 2023 15:15:37 -0500 Subject: [PATCH 01/29] Move destination modules to subfolder --- dlt/common/destination/reference.py | 2 +- dlt/destinations/impl/__init__.py | 0 dlt/destinations/{ => impl}/athena/__init__.py | 6 ++---- dlt/destinations/{ => impl}/athena/athena.py | 4 ++-- dlt/destinations/{ => impl}/athena/configuration.py | 0 dlt/destinations/{ => impl}/bigquery/README.md | 0 dlt/destinations/{ => impl}/bigquery/__init__.py | 6 +++--- dlt/destinations/{ => impl}/bigquery/bigquery.py | 6 +++--- dlt/destinations/{ => impl}/bigquery/configuration.py | 0 dlt/destinations/{ => impl}/bigquery/sql_client.py | 2 +- dlt/destinations/{ => impl}/duckdb/__init__.py | 4 ++-- dlt/destinations/{ => impl}/duckdb/configuration.py | 0 dlt/destinations/{ => impl}/duckdb/duck.py | 6 +++--- dlt/destinations/{ => impl}/duckdb/sql_client.py | 4 ++-- dlt/destinations/{ => impl}/dummy/__init__.py | 4 ++-- dlt/destinations/{ => impl}/dummy/configuration.py | 0 dlt/destinations/{ => impl}/dummy/dummy.py | 4 ++-- dlt/destinations/{ => impl}/filesystem/__init__.py | 4 ++-- .../{ => impl}/filesystem/configuration.py | 0 dlt/destinations/{ => impl}/filesystem/filesystem.py | 4 ++-- dlt/destinations/{ => impl}/motherduck/__init__.py | 4 ++-- .../{ => impl}/motherduck/configuration.py | 2 +- dlt/destinations/{ => impl}/motherduck/motherduck.py | 8 ++++---- dlt/destinations/{ => impl}/motherduck/sql_client.py | 6 +++--- dlt/destinations/{ => impl}/mssql/README.md | 0 dlt/destinations/{ => impl}/mssql/__init__.py | 4 ++-- dlt/destinations/{ => impl}/mssql/configuration.py | 0 dlt/destinations/{ => impl}/mssql/mssql.py | 6 +++--- dlt/destinations/{ => impl}/mssql/sql_client.py | 4 ++-- dlt/destinations/{ => impl}/postgres/README.md | 0 dlt/destinations/{ => impl}/postgres/__init__.py | 4 ++-- dlt/destinations/{ => impl}/postgres/configuration.py | 0 dlt/destinations/{ => impl}/postgres/postgres.py | 6 +++--- dlt/destinations/{ => impl}/postgres/sql_client.py | 4 ++-- dlt/destinations/{ => impl}/qdrant/__init__.py | 6 +++--- dlt/destinations/{ => impl}/qdrant/configuration.py | 0 dlt/destinations/{ => impl}/qdrant/qdrant_adapter.py | 0 dlt/destinations/{ => impl}/qdrant/qdrant_client.py | 8 ++++---- dlt/destinations/{ => impl}/redshift/README.md | 0 dlt/destinations/{ => impl}/redshift/__init__.py | 4 ++-- dlt/destinations/{ => impl}/redshift/configuration.py | 2 +- dlt/destinations/{ => impl}/redshift/redshift.py | 6 +++--- dlt/destinations/{ => impl}/snowflake/__init__.py | 4 ++-- dlt/destinations/{ => impl}/snowflake/configuration.py | 0 dlt/destinations/{ => impl}/snowflake/snowflake.py | 8 ++++---- dlt/destinations/{ => impl}/snowflake/sql_client.py | 4 ++-- dlt/destinations/{ => impl}/weaviate/README.md | 0 dlt/destinations/{ => impl}/weaviate/__init__.py | 6 +++--- dlt/destinations/{ => impl}/weaviate/ci_naming.py | 0 dlt/destinations/{ => impl}/weaviate/configuration.py | 0 dlt/destinations/{ => impl}/weaviate/exceptions.py | 0 dlt/destinations/{ => impl}/weaviate/naming.py | 0 .../{ => impl}/weaviate/weaviate_adapter.py | 0 .../{ => impl}/weaviate/weaviate_client.py | 8 ++++---- tests/common/data_writers/test_data_writers.py | 2 +- tests/helpers/dbt_tests/local/test_dbt_utils.py | 2 +- tests/helpers/dbt_tests/test_runner_dbt_versions.py | 4 ++-- tests/load/bigquery/test_bigquery_client.py | 2 +- tests/load/bigquery/test_bigquery_table_builder.py | 4 ++-- tests/load/duckdb/test_duckdb_client.py | 2 +- tests/load/duckdb/test_duckdb_table_builder.py | 4 ++-- tests/load/duckdb/test_motherduck_client.py | 2 +- tests/load/filesystem/test_filesystem_client.py | 2 +- tests/load/filesystem/utils.py | 4 ++-- tests/load/mssql/test_mssql_credentials.py | 2 +- tests/load/mssql/test_mssql_table_builder.py | 4 ++-- tests/load/pipeline/test_filesystem_pipeline.py | 2 +- tests/load/pipeline/utils.py | 2 +- tests/load/postgres/test_postgres_client.py | 6 +++--- tests/load/postgres/test_postgres_table_builder.py | 4 ++-- tests/load/qdrant/test_pipeline.py | 4 ++-- tests/load/qdrant/utils.py | 2 +- tests/load/redshift/test_redshift_client.py | 4 ++-- tests/load/redshift/test_redshift_table_builder.py | 4 ++-- tests/load/snowflake/test_snowflake_configuration.py | 2 +- tests/load/snowflake/test_snowflake_table_builder.py | 4 ++-- tests/load/test_dummy_client.py | 6 +++--- tests/load/test_insert_job_client.py | 2 +- tests/load/utils.py | 2 +- tests/load/weaviate/test_naming.py | 4 ++-- tests/load/weaviate/test_pipeline.py | 8 ++++---- tests/load/weaviate/test_weaviate_client.py | 6 +++--- tests/load/weaviate/utils.py | 4 ++-- tests/normalize/utils.py | 10 +++++----- tests/pipeline/test_dlt_versions.py | 4 ++-- tests/tools/clean_redshift.py | 4 ++-- 86 files changed, 138 insertions(+), 140 deletions(-) create mode 100644 dlt/destinations/impl/__init__.py rename dlt/destinations/{ => impl}/athena/__init__.py (93%) rename dlt/destinations/{ => impl}/athena/athena.py (99%) rename dlt/destinations/{ => impl}/athena/configuration.py (100%) rename dlt/destinations/{ => impl}/bigquery/README.md (100%) rename dlt/destinations/{ => impl}/bigquery/__init__.py (90%) rename dlt/destinations/{ => impl}/bigquery/bigquery.py (98%) rename dlt/destinations/{ => impl}/bigquery/configuration.py (100%) rename dlt/destinations/{ => impl}/bigquery/sql_client.py (99%) rename dlt/destinations/{ => impl}/duckdb/__init__.py (93%) rename dlt/destinations/{ => impl}/duckdb/configuration.py (100%) rename dlt/destinations/{ => impl}/duckdb/duck.py (96%) rename dlt/destinations/{ => impl}/duckdb/sql_client.py (98%) rename dlt/destinations/{ => impl}/dummy/__init__.py (92%) rename dlt/destinations/{ => impl}/dummy/configuration.py (100%) rename dlt/destinations/{ => impl}/dummy/dummy.py (97%) rename dlt/destinations/{ => impl}/filesystem/__init__.py (87%) rename dlt/destinations/{ => impl}/filesystem/configuration.py (100%) rename dlt/destinations/{ => impl}/filesystem/filesystem.py (98%) rename dlt/destinations/{ => impl}/motherduck/__init__.py (92%) rename dlt/destinations/{ => impl}/motherduck/configuration.py (97%) rename dlt/destinations/{ => impl}/motherduck/motherduck.py (70%) rename dlt/destinations/{ => impl}/motherduck/sql_client.py (83%) rename dlt/destinations/{ => impl}/mssql/README.md (100%) rename dlt/destinations/{ => impl}/mssql/__init__.py (94%) rename dlt/destinations/{ => impl}/mssql/configuration.py (100%) rename dlt/destinations/{ => impl}/mssql/mssql.py (97%) rename dlt/destinations/{ => impl}/mssql/sql_client.py (97%) rename dlt/destinations/{ => impl}/postgres/README.md (100%) rename dlt/destinations/{ => impl}/postgres/__init__.py (93%) rename dlt/destinations/{ => impl}/postgres/configuration.py (100%) rename dlt/destinations/{ => impl}/postgres/postgres.py (95%) rename dlt/destinations/{ => impl}/postgres/sql_client.py (97%) rename dlt/destinations/{ => impl}/qdrant/__init__.py (86%) rename dlt/destinations/{ => impl}/qdrant/configuration.py (100%) rename dlt/destinations/{ => impl}/qdrant/qdrant_adapter.py (100%) rename dlt/destinations/{ => impl}/qdrant/qdrant_client.py (98%) rename dlt/destinations/{ => impl}/redshift/README.md (100%) rename dlt/destinations/{ => impl}/redshift/__init__.py (93%) rename dlt/destinations/{ => impl}/redshift/configuration.py (88%) rename dlt/destinations/{ => impl}/redshift/redshift.py (97%) rename dlt/destinations/{ => impl}/snowflake/__init__.py (92%) rename dlt/destinations/{ => impl}/snowflake/configuration.py (100%) rename dlt/destinations/{ => impl}/snowflake/snowflake.py (97%) rename dlt/destinations/{ => impl}/snowflake/sql_client.py (98%) rename dlt/destinations/{ => impl}/weaviate/README.md (100%) rename dlt/destinations/{ => impl}/weaviate/__init__.py (86%) rename dlt/destinations/{ => impl}/weaviate/ci_naming.py (100%) rename dlt/destinations/{ => impl}/weaviate/configuration.py (100%) rename dlt/destinations/{ => impl}/weaviate/exceptions.py (100%) rename dlt/destinations/{ => impl}/weaviate/naming.py (100%) rename dlt/destinations/{ => impl}/weaviate/weaviate_adapter.py (100%) rename dlt/destinations/{ => impl}/weaviate/weaviate_client.py (98%) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 13172b41e9..ded654e965 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -373,7 +373,7 @@ def from_name(destination: TDestinationReferenceArg) -> "DestinationReference": destination_ref = cast(DestinationReference, import_module(destination)) else: # from known location - destination_ref = cast(DestinationReference, import_module(f"dlt.destinations.{destination}")) + destination_ref = cast(DestinationReference, import_module(f"dlt.destinations.impl.{destination}")) except ImportError: if "." in destination: raise UnknownDestinationModule(destination) diff --git a/dlt/destinations/impl/__init__.py b/dlt/destinations/impl/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dlt/destinations/athena/__init__.py b/dlt/destinations/impl/athena/__init__.py similarity index 93% rename from dlt/destinations/athena/__init__.py rename to dlt/destinations/impl/athena/__init__.py index 1fd7f14d57..10157a4a87 100644 --- a/dlt/destinations/athena/__init__.py +++ b/dlt/destinations/impl/athena/__init__.py @@ -7,7 +7,7 @@ from dlt.common.data_writers.escape import escape_athena_identifier from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.athena.configuration import AthenaClientConfiguration +from dlt.destinations.impl.athena.configuration import AthenaClientConfiguration from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration @with_config(spec=AthenaClientConfiguration, sections=(known_sections.DESTINATION, "athena",)) @@ -41,11 +41,9 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.athena.athena import AthenaClient + from dlt.destinations.impl.athena.athena import AthenaClient return AthenaClient(schema, _configure(initial_config)) # type: ignore def spec() -> Type[DestinationClientConfiguration]: return AthenaClientConfiguration - - diff --git a/dlt/destinations/athena/athena.py b/dlt/destinations/impl/athena/athena.py similarity index 99% rename from dlt/destinations/athena/athena.py rename to dlt/destinations/impl/athena/athena.py index 44d020c127..f675e7a496 100644 --- a/dlt/destinations/athena/athena.py +++ b/dlt/destinations/impl/athena/athena.py @@ -27,11 +27,11 @@ from dlt.destinations.typing import DBApi, DBTransaction from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation, LoadJobTerminalException -from dlt.destinations.athena import capabilities +from dlt.destinations.impl.athena import capabilities from dlt.destinations.sql_client import SqlClientBase, DBApiCursorImpl, raise_database_error, raise_open_connection_error from dlt.destinations.typing import DBApiCursor from dlt.destinations.job_client_impl import SqlJobClientWithStaging -from dlt.destinations.athena.configuration import AthenaClientConfiguration +from dlt.destinations.impl.athena.configuration import AthenaClientConfiguration from dlt.destinations.type_mapping import TypeMapper from dlt.destinations import path_utils diff --git a/dlt/destinations/athena/configuration.py b/dlt/destinations/impl/athena/configuration.py similarity index 100% rename from dlt/destinations/athena/configuration.py rename to dlt/destinations/impl/athena/configuration.py diff --git a/dlt/destinations/bigquery/README.md b/dlt/destinations/impl/bigquery/README.md similarity index 100% rename from dlt/destinations/bigquery/README.md rename to dlt/destinations/impl/bigquery/README.md diff --git a/dlt/destinations/bigquery/__init__.py b/dlt/destinations/impl/bigquery/__init__.py similarity index 90% rename from dlt/destinations/bigquery/__init__.py rename to dlt/destinations/impl/bigquery/__init__.py index 3d97e9a929..e694cccc41 100644 --- a/dlt/destinations/bigquery/__init__.py +++ b/dlt/destinations/impl/bigquery/__init__.py @@ -8,7 +8,7 @@ from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.bigquery.configuration import BigQueryClientConfiguration +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration @with_config(spec=BigQueryClientConfiguration, sections=(known_sections.DESTINATION, "bigquery",)) @@ -39,10 +39,10 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.bigquery.bigquery import BigQueryClient + from dlt.destinations.impl.bigquery.bigquery import BigQueryClient return BigQueryClient(schema, _configure(initial_config)) # type: ignore def spec() -> Type[DestinationClientConfiguration]: - return BigQueryClientConfiguration \ No newline at end of file + return BigQueryClientConfiguration diff --git a/dlt/destinations/bigquery/bigquery.py b/dlt/destinations/impl/bigquery/bigquery.py similarity index 98% rename from dlt/destinations/bigquery/bigquery.py rename to dlt/destinations/impl/bigquery/bigquery.py index 9cc7591f57..440123e46d 100644 --- a/dlt/destinations/bigquery/bigquery.py +++ b/dlt/destinations/impl/bigquery/bigquery.py @@ -17,9 +17,9 @@ from dlt.destinations.job_client_impl import SqlJobClientWithStaging from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate, DestinationTransientException, LoadJobNotExistsException, LoadJobTerminalException -from dlt.destinations.bigquery import capabilities -from dlt.destinations.bigquery.configuration import BigQueryClientConfiguration -from dlt.destinations.bigquery.sql_client import BigQuerySqlClient, BQ_TERMINAL_REASONS +from dlt.destinations.impl.bigquery import capabilities +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration +from dlt.destinations.impl.bigquery.sql_client import BigQuerySqlClient, BQ_TERMINAL_REASONS from dlt.destinations.sql_jobs import SqlMergeJob, SqlStagingCopyJob, SqlJobParams from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations.sql_client import SqlClientBase diff --git a/dlt/destinations/bigquery/configuration.py b/dlt/destinations/impl/bigquery/configuration.py similarity index 100% rename from dlt/destinations/bigquery/configuration.py rename to dlt/destinations/impl/bigquery/configuration.py diff --git a/dlt/destinations/bigquery/sql_client.py b/dlt/destinations/impl/bigquery/sql_client.py similarity index 99% rename from dlt/destinations/bigquery/sql_client.py rename to dlt/destinations/impl/bigquery/sql_client.py index 3d6eb19833..4939add0da 100644 --- a/dlt/destinations/bigquery/sql_client.py +++ b/dlt/destinations/impl/bigquery/sql_client.py @@ -17,7 +17,7 @@ from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation from dlt.destinations.sql_client import DBApiCursorImpl, SqlClientBase, raise_database_error, raise_open_connection_error -from dlt.destinations.bigquery import capabilities +from dlt.destinations.impl.bigquery import capabilities # terminal reasons as returned in BQ gRPC error response # https://cloud.google.com/bigquery/docs/error-messages diff --git a/dlt/destinations/duckdb/__init__.py b/dlt/destinations/impl/duckdb/__init__.py similarity index 93% rename from dlt/destinations/duckdb/__init__.py rename to dlt/destinations/impl/duckdb/__init__.py index d9882cc0eb..b2a57d0788 100644 --- a/dlt/destinations/duckdb/__init__.py +++ b/dlt/destinations/impl/duckdb/__init__.py @@ -8,7 +8,7 @@ from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.duckdb.configuration import DuckDbClientConfiguration +from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration @with_config(spec=DuckDbClientConfiguration, sections=(known_sections.DESTINATION, "duckdb",)) @@ -41,7 +41,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.duckdb.duck import DuckDbClient + from dlt.destinations.impl.duckdb.duck import DuckDbClient return DuckDbClient(schema, _configure(initial_config)) # type: ignore diff --git a/dlt/destinations/duckdb/configuration.py b/dlt/destinations/impl/duckdb/configuration.py similarity index 100% rename from dlt/destinations/duckdb/configuration.py rename to dlt/destinations/impl/duckdb/configuration.py diff --git a/dlt/destinations/duckdb/duck.py b/dlt/destinations/impl/duckdb/duck.py similarity index 96% rename from dlt/destinations/duckdb/duck.py rename to dlt/destinations/impl/duckdb/duck.py index 4a2e54f2b6..6e6ec359fe 100644 --- a/dlt/destinations/duckdb/duck.py +++ b/dlt/destinations/impl/duckdb/duck.py @@ -12,9 +12,9 @@ from dlt.destinations.insert_job_client import InsertValuesJobClient -from dlt.destinations.duckdb import capabilities -from dlt.destinations.duckdb.sql_client import DuckDbSqlClient -from dlt.destinations.duckdb.configuration import DuckDbClientConfiguration +from dlt.destinations.impl.duckdb import capabilities +from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient +from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration from dlt.destinations.type_mapping import TypeMapper diff --git a/dlt/destinations/duckdb/sql_client.py b/dlt/destinations/impl/duckdb/sql_client.py similarity index 98% rename from dlt/destinations/duckdb/sql_client.py rename to dlt/destinations/impl/duckdb/sql_client.py index cd2160f676..cb4e1678a2 100644 --- a/dlt/destinations/duckdb/sql_client.py +++ b/dlt/destinations/impl/duckdb/sql_client.py @@ -8,8 +8,8 @@ from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame from dlt.destinations.sql_client import SqlClientBase, DBApiCursorImpl, raise_database_error, raise_open_connection_error -from dlt.destinations.duckdb import capabilities -from dlt.destinations.duckdb.configuration import DuckDbBaseCredentials +from dlt.destinations.impl.duckdb import capabilities +from dlt.destinations.impl.duckdb.configuration import DuckDbBaseCredentials class DuckDBDBApiCursorImpl(DBApiCursorImpl): diff --git a/dlt/destinations/dummy/__init__.py b/dlt/destinations/impl/dummy/__init__.py similarity index 92% rename from dlt/destinations/dummy/__init__.py rename to dlt/destinations/impl/dummy/__init__.py index 7131f0109a..2c24b3b16f 100644 --- a/dlt/destinations/dummy/__init__.py +++ b/dlt/destinations/impl/dummy/__init__.py @@ -6,7 +6,7 @@ from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration -from dlt.destinations.dummy.configuration import DummyClientConfiguration +from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration @with_config(spec=DummyClientConfiguration, sections=(known_sections.DESTINATION, "dummy",)) @@ -34,7 +34,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.dummy.dummy import DummyClient + from dlt.destinations.impl.dummy.dummy import DummyClient return DummyClient(schema, _configure(initial_config)) # type: ignore diff --git a/dlt/destinations/dummy/configuration.py b/dlt/destinations/impl/dummy/configuration.py similarity index 100% rename from dlt/destinations/dummy/configuration.py rename to dlt/destinations/impl/dummy/configuration.py diff --git a/dlt/destinations/dummy/dummy.py b/dlt/destinations/impl/dummy/dummy.py similarity index 97% rename from dlt/destinations/dummy/dummy.py rename to dlt/destinations/impl/dummy/dummy.py index c8cac05d3a..abfd517c76 100644 --- a/dlt/destinations/dummy/dummy.py +++ b/dlt/destinations/impl/dummy/dummy.py @@ -13,8 +13,8 @@ from dlt.destinations.exceptions import (LoadJobNotExistsException, LoadJobInvalidStateTransitionException, DestinationTerminalException, DestinationTransientException) -from dlt.destinations.dummy import capabilities -from dlt.destinations.dummy.configuration import DummyClientConfiguration +from dlt.destinations.impl.dummy import capabilities +from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration class LoadDummyJob(LoadJob, FollowupJob): diff --git a/dlt/destinations/filesystem/__init__.py b/dlt/destinations/impl/filesystem/__init__.py similarity index 87% rename from dlt/destinations/filesystem/__init__.py rename to dlt/destinations/impl/filesystem/__init__.py index 3dc6c62480..abe2c4eca9 100644 --- a/dlt/destinations/filesystem/__init__.py +++ b/dlt/destinations/impl/filesystem/__init__.py @@ -6,7 +6,7 @@ from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import JobClientBase, DestinationClientDwhWithStagingConfiguration -from dlt.destinations.filesystem.configuration import FilesystemDestinationClientConfiguration +from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration @with_config(spec=FilesystemDestinationClientConfiguration, sections=(known_sections.DESTINATION, "filesystem",)) @@ -20,7 +20,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientDwhWithStagingConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.filesystem.filesystem import FilesystemClient + from dlt.destinations.impl.filesystem.filesystem import FilesystemClient return FilesystemClient(schema, _configure(initial_config)) # type: ignore diff --git a/dlt/destinations/filesystem/configuration.py b/dlt/destinations/impl/filesystem/configuration.py similarity index 100% rename from dlt/destinations/filesystem/configuration.py rename to dlt/destinations/impl/filesystem/configuration.py diff --git a/dlt/destinations/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py similarity index 98% rename from dlt/destinations/filesystem/filesystem.py rename to dlt/destinations/impl/filesystem/filesystem.py index 766f384024..fe349aac6b 100644 --- a/dlt/destinations/filesystem/filesystem.py +++ b/dlt/destinations/impl/filesystem/filesystem.py @@ -12,8 +12,8 @@ from dlt.common.destination.reference import NewLoadJob, TLoadJobState, LoadJob, JobClientBase, FollowupJob, WithStagingDataset from dlt.destinations.job_impl import EmptyLoadJob -from dlt.destinations.filesystem import capabilities -from dlt.destinations.filesystem.configuration import FilesystemDestinationClientConfiguration +from dlt.destinations.impl.filesystem import capabilities +from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations import path_utils diff --git a/dlt/destinations/motherduck/__init__.py b/dlt/destinations/impl/motherduck/__init__.py similarity index 92% rename from dlt/destinations/motherduck/__init__.py rename to dlt/destinations/impl/motherduck/__init__.py index eae67eaa74..4649ab9bf8 100644 --- a/dlt/destinations/motherduck/__init__.py +++ b/dlt/destinations/impl/motherduck/__init__.py @@ -8,7 +8,7 @@ from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.motherduck.configuration import MotherDuckClientConfiguration +from dlt.destinations.impl.motherduck.configuration import MotherDuckClientConfiguration @with_config(spec=MotherDuckClientConfiguration, sections=(known_sections.DESTINATION, "motherduck",)) @@ -39,7 +39,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.motherduck.motherduck import MotherDuckClient + from dlt.destinations.impl.motherduck.motherduck import MotherDuckClient return MotherDuckClient(schema, _configure(initial_config)) # type: ignore diff --git a/dlt/destinations/motherduck/configuration.py b/dlt/destinations/impl/motherduck/configuration.py similarity index 97% rename from dlt/destinations/motherduck/configuration.py rename to dlt/destinations/impl/motherduck/configuration.py index 18d480c945..a376f1a5aa 100644 --- a/dlt/destinations/motherduck/configuration.py +++ b/dlt/destinations/impl/motherduck/configuration.py @@ -7,7 +7,7 @@ from dlt.common.utils import digest128 from dlt.common.configuration.exceptions import ConfigurationValueError -from dlt.destinations.duckdb.configuration import DuckDbBaseCredentials +from dlt.destinations.impl.duckdb.configuration import DuckDbBaseCredentials MOTHERDUCK_DRIVERNAME = "md" diff --git a/dlt/destinations/motherduck/motherduck.py b/dlt/destinations/impl/motherduck/motherduck.py similarity index 70% rename from dlt/destinations/motherduck/motherduck.py rename to dlt/destinations/impl/motherduck/motherduck.py index 93c0ed163b..9822f2b7b6 100644 --- a/dlt/destinations/motherduck/motherduck.py +++ b/dlt/destinations/impl/motherduck/motherduck.py @@ -4,10 +4,10 @@ from dlt.common.schema import Schema -from dlt.destinations.duckdb.duck import DuckDbClient -from dlt.destinations.motherduck import capabilities -from dlt.destinations.motherduck.sql_client import MotherDuckSqlClient -from dlt.destinations.motherduck.configuration import MotherDuckClientConfiguration +from dlt.destinations.impl.duckdb.duck import DuckDbClient +from dlt.destinations.impl.motherduck import capabilities +from dlt.destinations.impl.motherduck.sql_client import MotherDuckSqlClient +from dlt.destinations.impl.motherduck.configuration import MotherDuckClientConfiguration class MotherDuckClient(DuckDbClient): diff --git a/dlt/destinations/motherduck/sql_client.py b/dlt/destinations/impl/motherduck/sql_client.py similarity index 83% rename from dlt/destinations/motherduck/sql_client.py rename to dlt/destinations/impl/motherduck/sql_client.py index 2fc664a2e8..672c377fd9 100644 --- a/dlt/destinations/motherduck/sql_client.py +++ b/dlt/destinations/impl/motherduck/sql_client.py @@ -8,9 +8,9 @@ from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame from dlt.destinations.sql_client import SqlClientBase, DBApiCursorImpl, raise_database_error, raise_open_connection_error -from dlt.destinations.duckdb.sql_client import DuckDbSqlClient, DuckDBDBApiCursorImpl -from dlt.destinations.motherduck import capabilities -from dlt.destinations.motherduck.configuration import MotherDuckCredentials +from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient, DuckDBDBApiCursorImpl +from dlt.destinations.impl.motherduck import capabilities +from dlt.destinations.impl.motherduck.configuration import MotherDuckCredentials class MotherDuckSqlClient(DuckDbSqlClient): diff --git a/dlt/destinations/mssql/README.md b/dlt/destinations/impl/mssql/README.md similarity index 100% rename from dlt/destinations/mssql/README.md rename to dlt/destinations/impl/mssql/README.md diff --git a/dlt/destinations/mssql/__init__.py b/dlt/destinations/impl/mssql/__init__.py similarity index 94% rename from dlt/destinations/mssql/__init__.py rename to dlt/destinations/impl/mssql/__init__.py index 56051a324e..8f9f92d4eb 100644 --- a/dlt/destinations/mssql/__init__.py +++ b/dlt/destinations/impl/mssql/__init__.py @@ -9,7 +9,7 @@ from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.wei import EVM_DECIMAL_PRECISION -from dlt.destinations.mssql.configuration import MsSqlClientConfiguration +from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration @with_config(spec=MsSqlClientConfiguration, sections=(known_sections.DESTINATION, "mssql",)) @@ -43,7 +43,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.mssql.mssql import MsSqlClient + from dlt.destinations.impl.mssql.mssql import MsSqlClient return MsSqlClient(schema, _configure(initial_config)) # type: ignore[arg-type] diff --git a/dlt/destinations/mssql/configuration.py b/dlt/destinations/impl/mssql/configuration.py similarity index 100% rename from dlt/destinations/mssql/configuration.py rename to dlt/destinations/impl/mssql/configuration.py diff --git a/dlt/destinations/mssql/mssql.py b/dlt/destinations/impl/mssql/mssql.py similarity index 97% rename from dlt/destinations/mssql/mssql.py rename to dlt/destinations/impl/mssql/mssql.py index cd999441ff..851122f20c 100644 --- a/dlt/destinations/mssql/mssql.py +++ b/dlt/destinations/impl/mssql/mssql.py @@ -12,9 +12,9 @@ from dlt.destinations.insert_job_client import InsertValuesJobClient -from dlt.destinations.mssql import capabilities -from dlt.destinations.mssql.sql_client import PyOdbcMsSqlClient -from dlt.destinations.mssql.configuration import MsSqlClientConfiguration +from dlt.destinations.impl.mssql import capabilities +from dlt.destinations.impl.mssql.sql_client import PyOdbcMsSqlClient +from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper diff --git a/dlt/destinations/mssql/sql_client.py b/dlt/destinations/impl/mssql/sql_client.py similarity index 97% rename from dlt/destinations/mssql/sql_client.py rename to dlt/destinations/impl/mssql/sql_client.py index 4dd983a334..5372fa3626 100644 --- a/dlt/destinations/mssql/sql_client.py +++ b/dlt/destinations/impl/mssql/sql_client.py @@ -13,8 +13,8 @@ from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction from dlt.destinations.sql_client import DBApiCursorImpl, SqlClientBase, raise_database_error, raise_open_connection_error -from dlt.destinations.mssql.configuration import MsSqlCredentials -from dlt.destinations.mssql import capabilities +from dlt.destinations.impl.mssql.configuration import MsSqlCredentials +from dlt.destinations.impl.mssql import capabilities def handle_datetimeoffset(dto_value: bytes) -> datetime: diff --git a/dlt/destinations/postgres/README.md b/dlt/destinations/impl/postgres/README.md similarity index 100% rename from dlt/destinations/postgres/README.md rename to dlt/destinations/impl/postgres/README.md diff --git a/dlt/destinations/postgres/__init__.py b/dlt/destinations/impl/postgres/__init__.py similarity index 93% rename from dlt/destinations/postgres/__init__.py rename to dlt/destinations/impl/postgres/__init__.py index e8904c075f..54bc3297b1 100644 --- a/dlt/destinations/postgres/__init__.py +++ b/dlt/destinations/impl/postgres/__init__.py @@ -9,7 +9,7 @@ from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.wei import EVM_DECIMAL_PRECISION -from dlt.destinations.postgres.configuration import PostgresClientConfiguration +from dlt.destinations.impl.postgres.configuration import PostgresClientConfiguration @with_config(spec=PostgresClientConfiguration, sections=(known_sections.DESTINATION, "postgres",)) @@ -41,7 +41,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.postgres.postgres import PostgresClient + from dlt.destinations.impl.postgres.postgres import PostgresClient return PostgresClient(schema, _configure(initial_config)) # type: ignore diff --git a/dlt/destinations/postgres/configuration.py b/dlt/destinations/impl/postgres/configuration.py similarity index 100% rename from dlt/destinations/postgres/configuration.py rename to dlt/destinations/impl/postgres/configuration.py diff --git a/dlt/destinations/postgres/postgres.py b/dlt/destinations/impl/postgres/postgres.py similarity index 95% rename from dlt/destinations/postgres/postgres.py rename to dlt/destinations/impl/postgres/postgres.py index 2812d1d4c4..03c42f4d75 100644 --- a/dlt/destinations/postgres/postgres.py +++ b/dlt/destinations/impl/postgres/postgres.py @@ -11,9 +11,9 @@ from dlt.destinations.insert_job_client import InsertValuesJobClient -from dlt.destinations.postgres import capabilities -from dlt.destinations.postgres.sql_client import Psycopg2SqlClient -from dlt.destinations.postgres.configuration import PostgresClientConfiguration +from dlt.destinations.impl.postgres import capabilities +from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient +from dlt.destinations.impl.postgres.configuration import PostgresClientConfiguration from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper diff --git a/dlt/destinations/postgres/sql_client.py b/dlt/destinations/impl/postgres/sql_client.py similarity index 97% rename from dlt/destinations/postgres/sql_client.py rename to dlt/destinations/impl/postgres/sql_client.py index 079a0ae477..b6c4c1a1be 100644 --- a/dlt/destinations/postgres/sql_client.py +++ b/dlt/destinations/impl/postgres/sql_client.py @@ -16,8 +16,8 @@ from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction from dlt.destinations.sql_client import DBApiCursorImpl, SqlClientBase, raise_database_error, raise_open_connection_error -from dlt.destinations.postgres.configuration import PostgresCredentials -from dlt.destinations.postgres import capabilities +from dlt.destinations.impl.postgres.configuration import PostgresCredentials +from dlt.destinations.impl.postgres import capabilities class Psycopg2SqlClient(SqlClientBase["psycopg2.connection"], DBTransaction): diff --git a/dlt/destinations/qdrant/__init__.py b/dlt/destinations/impl/qdrant/__init__.py similarity index 86% rename from dlt/destinations/qdrant/__init__.py rename to dlt/destinations/impl/qdrant/__init__.py index 7a8619ffcd..4bdf7f6b9e 100644 --- a/dlt/destinations/qdrant/__init__.py +++ b/dlt/destinations/impl/qdrant/__init__.py @@ -8,9 +8,9 @@ DestinationClientConfiguration, ) from dlt.common.destination import DestinationCapabilitiesContext -from dlt.destinations.qdrant.qdrant_adapter import qdrant_adapter +from dlt.destinations.impl.qdrant.qdrant_adapter import qdrant_adapter -from dlt.destinations.qdrant.configuration import QdrantClientConfiguration +from dlt.destinations.impl.qdrant.configuration import QdrantClientConfiguration @with_config( @@ -45,7 +45,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client( schema: Schema, initial_config: DestinationClientConfiguration = config.value ) -> JobClientBase: - from dlt.destinations.qdrant.qdrant_client import QdrantClient + from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient return QdrantClient(schema, _configure(initial_config)) # type: ignore diff --git a/dlt/destinations/qdrant/configuration.py b/dlt/destinations/impl/qdrant/configuration.py similarity index 100% rename from dlt/destinations/qdrant/configuration.py rename to dlt/destinations/impl/qdrant/configuration.py diff --git a/dlt/destinations/qdrant/qdrant_adapter.py b/dlt/destinations/impl/qdrant/qdrant_adapter.py similarity index 100% rename from dlt/destinations/qdrant/qdrant_adapter.py rename to dlt/destinations/impl/qdrant/qdrant_adapter.py diff --git a/dlt/destinations/qdrant/qdrant_client.py b/dlt/destinations/impl/qdrant/qdrant_client.py similarity index 98% rename from dlt/destinations/qdrant/qdrant_client.py rename to dlt/destinations/impl/qdrant/qdrant_client.py index cba87e9528..029530d624 100644 --- a/dlt/destinations/qdrant/qdrant_client.py +++ b/dlt/destinations/impl/qdrant/qdrant_client.py @@ -11,9 +11,9 @@ from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.job_client_impl import StorageSchemaInfo, StateInfo -from dlt.destinations.qdrant import capabilities -from dlt.destinations.qdrant.configuration import QdrantClientConfiguration -from dlt.destinations.qdrant.qdrant_adapter import VECTORIZE_HINT +from dlt.destinations.impl.qdrant import capabilities +from dlt.destinations.impl.qdrant.configuration import QdrantClientConfiguration +from dlt.destinations.impl.qdrant.qdrant_adapter import VECTORIZE_HINT from qdrant_client import QdrantClient as QC, models from qdrant_client.qdrant_fastembed import uuid @@ -406,4 +406,4 @@ def _collection_exists(self, table_name: str, qualify_table_name: bool = True) - except UnexpectedResponse as e: if e.status_code == 404: return False - raise e \ No newline at end of file + raise e diff --git a/dlt/destinations/redshift/README.md b/dlt/destinations/impl/redshift/README.md similarity index 100% rename from dlt/destinations/redshift/README.md rename to dlt/destinations/impl/redshift/README.md diff --git a/dlt/destinations/redshift/__init__.py b/dlt/destinations/impl/redshift/__init__.py similarity index 93% rename from dlt/destinations/redshift/__init__.py rename to dlt/destinations/impl/redshift/__init__.py index 96741e86cd..be5052b07b 100644 --- a/dlt/destinations/redshift/__init__.py +++ b/dlt/destinations/impl/redshift/__init__.py @@ -8,7 +8,7 @@ from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.redshift.configuration import RedshiftClientConfiguration +from dlt.destinations.impl.redshift.configuration import RedshiftClientConfiguration @with_config(spec=RedshiftClientConfiguration, sections=(known_sections.DESTINATION, "redshift",)) @@ -40,7 +40,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.redshift.redshift import RedshiftClient + from dlt.destinations.impl.redshift.redshift import RedshiftClient return RedshiftClient(schema, _configure(initial_config)) # type: ignore diff --git a/dlt/destinations/redshift/configuration.py b/dlt/destinations/impl/redshift/configuration.py similarity index 88% rename from dlt/destinations/redshift/configuration.py rename to dlt/destinations/impl/redshift/configuration.py index 7cb13b996f..7018445773 100644 --- a/dlt/destinations/redshift/configuration.py +++ b/dlt/destinations/impl/redshift/configuration.py @@ -4,7 +4,7 @@ from dlt.common.configuration import configspec from dlt.common.utils import digest128 -from dlt.destinations.postgres.configuration import PostgresCredentials, PostgresClientConfiguration +from dlt.destinations.impl.postgres.configuration import PostgresCredentials, PostgresClientConfiguration @configspec diff --git a/dlt/destinations/redshift/redshift.py b/dlt/destinations/impl/redshift/redshift.py similarity index 97% rename from dlt/destinations/redshift/redshift.py rename to dlt/destinations/impl/redshift/redshift.py index 888f27ae7c..2124807bc1 100644 --- a/dlt/destinations/redshift/redshift.py +++ b/dlt/destinations/impl/redshift/redshift.py @@ -1,7 +1,7 @@ import platform import os -from dlt.destinations.postgres.sql_client import Psycopg2SqlClient +from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient from dlt.common.schema.utils import table_schema_has_type, table_schema_has_type_with_precision if platform.python_implementation() == "PyPy": @@ -25,8 +25,8 @@ from dlt.destinations.exceptions import DatabaseTerminalException, LoadJobTerminalException from dlt.destinations.job_client_impl import CopyRemoteFileLoadJob, LoadJob -from dlt.destinations.redshift import capabilities -from dlt.destinations.redshift.configuration import RedshiftClientConfiguration +from dlt.destinations.impl.redshift import capabilities +from dlt.destinations.impl.redshift.configuration import RedshiftClientConfiguration from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper diff --git a/dlt/destinations/snowflake/__init__.py b/dlt/destinations/impl/snowflake/__init__.py similarity index 92% rename from dlt/destinations/snowflake/__init__.py rename to dlt/destinations/impl/snowflake/__init__.py index 5d32bc41fd..0cad57b309 100644 --- a/dlt/destinations/snowflake/__init__.py +++ b/dlt/destinations/impl/snowflake/__init__.py @@ -9,7 +9,7 @@ from dlt.common.data_writers.escape import escape_snowflake_identifier from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.snowflake.configuration import SnowflakeClientConfiguration +from dlt.destinations.impl.snowflake.configuration import SnowflakeClientConfiguration @with_config(spec=SnowflakeClientConfiguration, sections=(known_sections.DESTINATION, "snowflake",)) @@ -39,7 +39,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.snowflake.snowflake import SnowflakeClient + from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient return SnowflakeClient(schema, _configure(initial_config)) # type: ignore diff --git a/dlt/destinations/snowflake/configuration.py b/dlt/destinations/impl/snowflake/configuration.py similarity index 100% rename from dlt/destinations/snowflake/configuration.py rename to dlt/destinations/impl/snowflake/configuration.py diff --git a/dlt/destinations/snowflake/snowflake.py b/dlt/destinations/impl/snowflake/snowflake.py similarity index 97% rename from dlt/destinations/snowflake/snowflake.py rename to dlt/destinations/impl/snowflake/snowflake.py index f433ec7e7d..ead3e810d2 100644 --- a/dlt/destinations/snowflake/snowflake.py +++ b/dlt/destinations/impl/snowflake/snowflake.py @@ -14,11 +14,11 @@ from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.exceptions import LoadJobTerminalException -from dlt.destinations.snowflake import capabilities -from dlt.destinations.snowflake.configuration import SnowflakeClientConfiguration -from dlt.destinations.snowflake.sql_client import SnowflakeSqlClient +from dlt.destinations.impl.snowflake import capabilities +from dlt.destinations.impl.snowflake.configuration import SnowflakeClientConfiguration +from dlt.destinations.impl.snowflake.sql_client import SnowflakeSqlClient from dlt.destinations.sql_jobs import SqlStagingCopyJob, SqlJobParams -from dlt.destinations.snowflake.sql_client import SnowflakeSqlClient +from dlt.destinations.impl.snowflake.sql_client import SnowflakeSqlClient from dlt.destinations.job_impl import NewReferenceJob from dlt.destinations.sql_client import SqlClientBase from dlt.destinations.type_mapping import TypeMapper diff --git a/dlt/destinations/snowflake/sql_client.py b/dlt/destinations/impl/snowflake/sql_client.py similarity index 98% rename from dlt/destinations/snowflake/sql_client.py rename to dlt/destinations/impl/snowflake/sql_client.py index 40cdc990a0..139a5ebb7a 100644 --- a/dlt/destinations/snowflake/sql_client.py +++ b/dlt/destinations/impl/snowflake/sql_client.py @@ -7,8 +7,8 @@ from dlt.destinations.exceptions import DatabaseTerminalException, DatabaseTransientException, DatabaseUndefinedRelation from dlt.destinations.sql_client import DBApiCursorImpl, SqlClientBase, raise_database_error, raise_open_connection_error from dlt.destinations.typing import DBApi, DBApiCursor, DBTransaction, DataFrame -from dlt.destinations.snowflake.configuration import SnowflakeCredentials -from dlt.destinations.snowflake import capabilities +from dlt.destinations.impl.snowflake.configuration import SnowflakeCredentials +from dlt.destinations.impl.snowflake import capabilities class SnowflakeCursorImpl(DBApiCursorImpl): native_cursor: snowflake_lib.cursor.SnowflakeCursor # type: ignore[assignment] diff --git a/dlt/destinations/weaviate/README.md b/dlt/destinations/impl/weaviate/README.md similarity index 100% rename from dlt/destinations/weaviate/README.md rename to dlt/destinations/impl/weaviate/README.md diff --git a/dlt/destinations/weaviate/__init__.py b/dlt/destinations/impl/weaviate/__init__.py similarity index 86% rename from dlt/destinations/weaviate/__init__.py rename to dlt/destinations/impl/weaviate/__init__.py index ebd87aea0c..36237702a0 100644 --- a/dlt/destinations/weaviate/__init__.py +++ b/dlt/destinations/impl/weaviate/__init__.py @@ -9,8 +9,8 @@ ) from dlt.common.destination import DestinationCapabilitiesContext -from dlt.destinations.weaviate.weaviate_adapter import weaviate_adapter -from dlt.destinations.weaviate.configuration import WeaviateClientConfiguration +from dlt.destinations.impl.weaviate.weaviate_adapter import weaviate_adapter +from dlt.destinations.impl.weaviate.configuration import WeaviateClientConfiguration @with_config( @@ -46,7 +46,7 @@ def capabilities() -> DestinationCapabilitiesContext: def client( schema: Schema, initial_config: DestinationClientConfiguration = config.value ) -> JobClientBase: - from dlt.destinations.weaviate.weaviate_client import WeaviateClient + from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient return WeaviateClient(schema, _configure(initial_config)) # type: ignore diff --git a/dlt/destinations/weaviate/ci_naming.py b/dlt/destinations/impl/weaviate/ci_naming.py similarity index 100% rename from dlt/destinations/weaviate/ci_naming.py rename to dlt/destinations/impl/weaviate/ci_naming.py diff --git a/dlt/destinations/weaviate/configuration.py b/dlt/destinations/impl/weaviate/configuration.py similarity index 100% rename from dlt/destinations/weaviate/configuration.py rename to dlt/destinations/impl/weaviate/configuration.py diff --git a/dlt/destinations/weaviate/exceptions.py b/dlt/destinations/impl/weaviate/exceptions.py similarity index 100% rename from dlt/destinations/weaviate/exceptions.py rename to dlt/destinations/impl/weaviate/exceptions.py diff --git a/dlt/destinations/weaviate/naming.py b/dlt/destinations/impl/weaviate/naming.py similarity index 100% rename from dlt/destinations/weaviate/naming.py rename to dlt/destinations/impl/weaviate/naming.py diff --git a/dlt/destinations/weaviate/weaviate_adapter.py b/dlt/destinations/impl/weaviate/weaviate_adapter.py similarity index 100% rename from dlt/destinations/weaviate/weaviate_adapter.py rename to dlt/destinations/impl/weaviate/weaviate_adapter.py diff --git a/dlt/destinations/weaviate/weaviate_client.py b/dlt/destinations/impl/weaviate/weaviate_client.py similarity index 98% rename from dlt/destinations/weaviate/weaviate_client.py rename to dlt/destinations/impl/weaviate/weaviate_client.py index d47f08ab59..099cdc7368 100644 --- a/dlt/destinations/weaviate/weaviate_client.py +++ b/dlt/destinations/impl/weaviate/weaviate_client.py @@ -41,13 +41,13 @@ from dlt.common.data_types import TDataType from dlt.common.storages import FileStorage -from dlt.destinations.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT +from dlt.destinations.impl.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT from dlt.destinations.job_impl import EmptyLoadJob from dlt.destinations.job_client_impl import StorageSchemaInfo, StateInfo -from dlt.destinations.weaviate import capabilities -from dlt.destinations.weaviate.configuration import WeaviateClientConfiguration -from dlt.destinations.weaviate.exceptions import PropertyNameConflict, WeaviateBatchError +from dlt.destinations.impl.weaviate import capabilities +from dlt.destinations.impl.weaviate.configuration import WeaviateClientConfiguration +from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict, WeaviateBatchError from dlt.destinations.type_mapping import TypeMapper diff --git a/tests/common/data_writers/test_data_writers.py b/tests/common/data_writers/test_data_writers.py index 66b8f765c7..9d655bc4db 100644 --- a/tests/common/data_writers/test_data_writers.py +++ b/tests/common/data_writers/test_data_writers.py @@ -5,7 +5,7 @@ from dlt.common import pendulum, json from dlt.common.typing import AnyFun # from dlt.destinations.postgres import capabilities -from dlt.destinations.redshift import capabilities as redshift_caps +from dlt.destinations.impl.redshift import capabilities as redshift_caps from dlt.common.data_writers.escape import escape_redshift_identifier, escape_bigquery_identifier, escape_redshift_literal, escape_postgres_literal, escape_duckdb_literal from dlt.common.data_writers.writers import DataWriter, InsertValuesWriter, JsonlWriter, ParquetDataWriter diff --git a/tests/helpers/dbt_tests/local/test_dbt_utils.py b/tests/helpers/dbt_tests/local/test_dbt_utils.py index 71e570bd69..133ecf1617 100644 --- a/tests/helpers/dbt_tests/local/test_dbt_utils.py +++ b/tests/helpers/dbt_tests/local/test_dbt_utils.py @@ -7,7 +7,7 @@ from dlt.common.storages import FileStorage from dlt.common.utils import uniq_id -from dlt.destinations.postgres.configuration import PostgresCredentials +from dlt.destinations.impl.postgres.configuration import PostgresCredentials from dlt.helpers.dbt.dbt_utils import DBTProcessingError, initialize_dbt_logging, run_dbt_command, is_incremental_schema_out_of_sync_error from tests.utils import test_storage, preserve_environ diff --git a/tests/helpers/dbt_tests/test_runner_dbt_versions.py b/tests/helpers/dbt_tests/test_runner_dbt_versions.py index b418bf15b6..b369c5e64c 100644 --- a/tests/helpers/dbt_tests/test_runner_dbt_versions.py +++ b/tests/helpers/dbt_tests/test_runner_dbt_versions.py @@ -14,8 +14,8 @@ from dlt.common.runners.synth_pickle import decode_obj, encode_obj from dlt.common.typing import AnyFun -from dlt.destinations.postgres.postgres import PostgresClient -from dlt.destinations.bigquery import BigQueryClientConfiguration +from dlt.destinations.impl.postgres.postgres import PostgresClient +from dlt.destinations.impl.bigquery import BigQueryClientConfiguration from dlt.helpers.dbt.configuration import DBTRunnerConfiguration from dlt.helpers.dbt.exceptions import PrerequisitesException, DBTProcessingError from dlt.helpers.dbt import package_runner, create_venv, _create_dbt_deps, _default_profile_name, DEFAULT_DBT_VERSION diff --git a/tests/load/bigquery/test_bigquery_client.py b/tests/load/bigquery/test_bigquery_client.py index 145898cde3..9985147748 100644 --- a/tests/load/bigquery/test_bigquery_client.py +++ b/tests/load/bigquery/test_bigquery_client.py @@ -14,7 +14,7 @@ from dlt.common.storages import FileStorage from dlt.common.utils import digest128, uniq_id, custom_environ -from dlt.destinations.bigquery.bigquery import BigQueryClient, BigQueryClientConfiguration +from dlt.destinations.impl.bigquery.bigquery import BigQueryClient, BigQueryClientConfiguration from dlt.destinations.exceptions import LoadJobNotExistsException, LoadJobTerminalException from tests.utils import TEST_STORAGE_ROOT, delete_test_storage, preserve_environ diff --git a/tests/load/bigquery/test_bigquery_table_builder.py b/tests/load/bigquery/test_bigquery_table_builder.py index a3222ba020..0d8ab1c8c2 100644 --- a/tests/load/bigquery/test_bigquery_table_builder.py +++ b/tests/load/bigquery/test_bigquery_table_builder.py @@ -8,8 +8,8 @@ from dlt.common.configuration import resolve_configuration from dlt.common.configuration.specs import GcpServiceAccountCredentialsWithoutDefaults -from dlt.destinations.bigquery.bigquery import BigQueryClient -from dlt.destinations.bigquery.configuration import BigQueryClientConfiguration +from dlt.destinations.impl.bigquery.bigquery import BigQueryClient +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate from tests.load.utils import TABLE_UPDATE diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py index 6c362a6b76..9d3faa3881 100644 --- a/tests/load/duckdb/test_duckdb_client.py +++ b/tests/load/duckdb/test_duckdb_client.py @@ -6,7 +6,7 @@ from dlt.common.configuration.resolve import resolve_configuration from dlt.common.configuration.utils import get_resolved_traces -from dlt.destinations.duckdb.configuration import DUCK_DB_NAME, DuckDbClientConfiguration, DuckDbCredentials, DEFAULT_DUCK_DB_NAME +from dlt.destinations.impl.duckdb.configuration import DUCK_DB_NAME, DuckDbClientConfiguration, DuckDbCredentials, DEFAULT_DUCK_DB_NAME from tests.load.pipeline.utils import drop_pipeline, assert_table from tests.utils import patch_home_dir, autouse_test_storage, preserve_environ, TEST_STORAGE_ROOT diff --git a/tests/load/duckdb/test_duckdb_table_builder.py b/tests/load/duckdb/test_duckdb_table_builder.py index 247d134b06..a5870763fc 100644 --- a/tests/load/duckdb/test_duckdb_table_builder.py +++ b/tests/load/duckdb/test_duckdb_table_builder.py @@ -5,8 +5,8 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema -from dlt.destinations.duckdb.duck import DuckDbClient -from dlt.destinations.duckdb.configuration import DuckDbClientConfiguration +from dlt.destinations.impl.duckdb.duck import DuckDbClient +from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration from tests.load.utils import TABLE_UPDATE diff --git a/tests/load/duckdb/test_motherduck_client.py b/tests/load/duckdb/test_motherduck_client.py index 4a167fa016..582847bfa2 100644 --- a/tests/load/duckdb/test_motherduck_client.py +++ b/tests/load/duckdb/test_motherduck_client.py @@ -3,7 +3,7 @@ from dlt.common.configuration.resolve import resolve_configuration -from dlt.destinations.motherduck.configuration import MotherDuckCredentials, MotherDuckClientConfiguration +from dlt.destinations.impl.motherduck.configuration import MotherDuckCredentials, MotherDuckClientConfiguration from tests.utils import patch_home_dir, preserve_environ, skip_if_not_active diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index f290892e18..0055f37716 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -6,7 +6,7 @@ from dlt.common.utils import digest128, uniq_id from dlt.common.storages import LoadStorage, FileStorage -from dlt.destinations.filesystem.filesystem import LoadFilesystemJob, FilesystemDestinationClientConfiguration +from dlt.destinations.impl.filesystem.filesystem import LoadFilesystemJob, FilesystemDestinationClientConfiguration from tests.load.filesystem.utils import perform_load from tests.utils import clean_test_storage, init_test_logging diff --git a/tests/load/filesystem/utils.py b/tests/load/filesystem/utils.py index eebfa6e87c..a3cc56e0da 100644 --- a/tests/load/filesystem/utils.py +++ b/tests/load/filesystem/utils.py @@ -6,8 +6,8 @@ from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.destination.reference import DestinationReference, LoadJob -from dlt.destinations import filesystem -from dlt.destinations.filesystem.filesystem import FilesystemClient +from dlt.destinations.impl import filesystem +from dlt.destinations.impl.filesystem.filesystem import FilesystemClient from dlt.destinations.job_impl import EmptyLoadJob from tests.load.utils import prepare_load_package diff --git a/tests/load/mssql/test_mssql_credentials.py b/tests/load/mssql/test_mssql_credentials.py index 9b57692bb2..5428246247 100644 --- a/tests/load/mssql/test_mssql_credentials.py +++ b/tests/load/mssql/test_mssql_credentials.py @@ -1,6 +1,6 @@ from dlt.common.configuration import resolve_configuration -from dlt.destinations.mssql.configuration import MsSqlCredentials +from dlt.destinations.impl.mssql.configuration import MsSqlCredentials diff --git a/tests/load/mssql/test_mssql_table_builder.py b/tests/load/mssql/test_mssql_table_builder.py index 4f5a6637d6..114d94a20f 100644 --- a/tests/load/mssql/test_mssql_table_builder.py +++ b/tests/load/mssql/test_mssql_table_builder.py @@ -7,8 +7,8 @@ pytest.importorskip("dlt.destinations.mssql.mssql", reason="MSSQL ODBC driver not installed") -from dlt.destinations.mssql.mssql import MsSqlClient -from dlt.destinations.mssql.configuration import MsSqlClientConfiguration, MsSqlCredentials +from dlt.destinations.impl.mssql.mssql import MsSqlClient +from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration, MsSqlCredentials from tests.load.utils import TABLE_UPDATE diff --git a/tests/load/pipeline/test_filesystem_pipeline.py b/tests/load/pipeline/test_filesystem_pipeline.py index 8e810015f2..dce65bc8d7 100644 --- a/tests/load/pipeline/test_filesystem_pipeline.py +++ b/tests/load/pipeline/test_filesystem_pipeline.py @@ -4,7 +4,7 @@ import dlt, os from dlt.common.utils import uniq_id from dlt.common.storages.load_storage import LoadJobInfo -from dlt.destinations.filesystem.filesystem import FilesystemClient, LoadFilesystemJob +from dlt.destinations.impl.filesystem.filesystem import FilesystemClient, LoadFilesystemJob from dlt.common.schema.typing import LOADS_TABLE_NAME from tests.utils import skip_if_not_active diff --git a/tests/load/pipeline/utils.py b/tests/load/pipeline/utils.py index 752571591c..7ed71fe27a 100644 --- a/tests/load/pipeline/utils.py +++ b/tests/load/pipeline/utils.py @@ -16,7 +16,7 @@ from tests.load.utils import DestinationTestConfiguration, destinations_configs if TYPE_CHECKING: - from dlt.destinations.filesystem.filesystem import FilesystemClient + from dlt.destinations.impl.filesystem.filesystem import FilesystemClient @pytest.fixture(autouse=True) def drop_pipeline(request) -> Iterator[None]: diff --git a/tests/load/postgres/test_postgres_client.py b/tests/load/postgres/test_postgres_client.py index dcc242cf50..65ac61cfd4 100644 --- a/tests/load/postgres/test_postgres_client.py +++ b/tests/load/postgres/test_postgres_client.py @@ -7,9 +7,9 @@ from dlt.common.storages import FileStorage from dlt.common.utils import uniq_id -from dlt.destinations.postgres.configuration import PostgresCredentials -from dlt.destinations.postgres.postgres import PostgresClient -from dlt.destinations.postgres.sql_client import psycopg2 +from dlt.destinations.impl.postgres.configuration import PostgresCredentials +from dlt.destinations.impl.postgres.postgres import PostgresClient +from dlt.destinations.impl.postgres.sql_client import psycopg2 from tests.utils import TEST_STORAGE_ROOT, delete_test_storage, skipifpypy, preserve_environ from tests.load.utils import expect_load_file, prepare_table, yield_client_with_storage diff --git a/tests/load/postgres/test_postgres_table_builder.py b/tests/load/postgres/test_postgres_table_builder.py index 165c62a468..1d6965c0c0 100644 --- a/tests/load/postgres/test_postgres_table_builder.py +++ b/tests/load/postgres/test_postgres_table_builder.py @@ -5,8 +5,8 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema -from dlt.destinations.postgres.postgres import PostgresClient -from dlt.destinations.postgres.configuration import PostgresClientConfiguration, PostgresCredentials +from dlt.destinations.impl.postgres.postgres import PostgresClient +from dlt.destinations.impl.postgres.configuration import PostgresClientConfiguration, PostgresCredentials from tests.load.utils import TABLE_UPDATE diff --git a/tests/load/qdrant/test_pipeline.py b/tests/load/qdrant/test_pipeline.py index 303a5de69f..760eec4631 100644 --- a/tests/load/qdrant/test_pipeline.py +++ b/tests/load/qdrant/test_pipeline.py @@ -5,8 +5,8 @@ from dlt.common import json from dlt.common.utils import uniq_id -from dlt.destinations.qdrant.qdrant_adapter import qdrant_adapter, VECTORIZE_HINT -from dlt.destinations.qdrant.qdrant_client import QdrantClient +from dlt.destinations.impl.qdrant.qdrant_adapter import qdrant_adapter, VECTORIZE_HINT +from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient from tests.pipeline.utils import assert_load_info from tests.load.qdrant.utils import drop_active_pipeline_data, assert_collection diff --git a/tests/load/qdrant/utils.py b/tests/load/qdrant/utils.py index 96b582a28e..1dfacbee7f 100644 --- a/tests/load/qdrant/utils.py +++ b/tests/load/qdrant/utils.py @@ -5,7 +5,7 @@ from dlt.common.pipeline import PipelineContext from dlt.common.configuration.container import Container -from dlt.destinations.qdrant.qdrant_client import QdrantClient +from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient def assert_unordered_list_equal(list1: List[Any], list2: List[Any]) -> None: diff --git a/tests/load/redshift/test_redshift_client.py b/tests/load/redshift/test_redshift_client.py index 9839965b70..7f617024df 100644 --- a/tests/load/redshift/test_redshift_client.py +++ b/tests/load/redshift/test_redshift_client.py @@ -12,8 +12,8 @@ from dlt.common.utils import uniq_id from dlt.destinations.exceptions import DatabaseTerminalException -from dlt.destinations.redshift.configuration import RedshiftCredentials -from dlt.destinations.redshift.redshift import RedshiftClient, psycopg2 +from dlt.destinations.impl.redshift.configuration import RedshiftCredentials +from dlt.destinations.impl.redshift.redshift import RedshiftClient, psycopg2 from tests.common.utils import COMMON_TEST_CASES_PATH from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage, skipifpypy diff --git a/tests/load/redshift/test_redshift_table_builder.py b/tests/load/redshift/test_redshift_table_builder.py index 8c61ccc1f2..2e0feb44e7 100644 --- a/tests/load/redshift/test_redshift_table_builder.py +++ b/tests/load/redshift/test_redshift_table_builder.py @@ -6,8 +6,8 @@ from dlt.common.schema import Schema from dlt.common.configuration import resolve_configuration -from dlt.destinations.redshift.redshift import RedshiftClient -from dlt.destinations.redshift.configuration import RedshiftClientConfiguration, RedshiftCredentials +from dlt.destinations.impl.redshift.redshift import RedshiftClient +from dlt.destinations.impl.redshift.configuration import RedshiftClientConfiguration, RedshiftCredentials from tests.load.utils import TABLE_UPDATE diff --git a/tests/load/snowflake/test_snowflake_configuration.py b/tests/load/snowflake/test_snowflake_configuration.py index 7108ad06e5..abf80a1241 100644 --- a/tests/load/snowflake/test_snowflake_configuration.py +++ b/tests/load/snowflake/test_snowflake_configuration.py @@ -9,7 +9,7 @@ from dlt.common.configuration.exceptions import ConfigurationValueError from dlt.common.utils import digest128 -from dlt.destinations.snowflake.configuration import SnowflakeClientConfiguration, SnowflakeCredentials +from dlt.destinations.impl.snowflake.configuration import SnowflakeClientConfiguration, SnowflakeCredentials from tests.common.configuration.utils import environment diff --git a/tests/load/snowflake/test_snowflake_table_builder.py b/tests/load/snowflake/test_snowflake_table_builder.py index 81164625f9..9ede1c8d13 100644 --- a/tests/load/snowflake/test_snowflake_table_builder.py +++ b/tests/load/snowflake/test_snowflake_table_builder.py @@ -5,8 +5,8 @@ from dlt.common.utils import uniq_id from dlt.common.schema import Schema -from dlt.destinations.snowflake.snowflake import SnowflakeClient -from dlt.destinations.snowflake.configuration import SnowflakeClientConfiguration, SnowflakeCredentials +from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient +from dlt.destinations.impl.snowflake.configuration import SnowflakeClientConfiguration, SnowflakeCredentials from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate from tests.load.utils import TABLE_UPDATE diff --git a/tests/load/test_dummy_client.py b/tests/load/test_dummy_client.py index e7e0166177..bb4b76c0b7 100644 --- a/tests/load/test_dummy_client.py +++ b/tests/load/test_dummy_client.py @@ -16,9 +16,9 @@ from dlt.load import Load from dlt.destinations.job_impl import EmptyLoadJob -from dlt.destinations import dummy -from dlt.destinations.dummy import dummy as dummy_impl -from dlt.destinations.dummy.configuration import DummyClientConfiguration +from dlt.destinations.impl import dummy +from dlt.destinations.impl.dummy import dummy as dummy_impl +from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration from dlt.load.exceptions import LoadClientJobFailed, LoadClientJobRetry from dlt.common.schema.utils import get_top_level_table diff --git a/tests/load/test_insert_job_client.py b/tests/load/test_insert_job_client.py index 95e63a79f2..86049b035a 100644 --- a/tests/load/test_insert_job_client.py +++ b/tests/load/test_insert_job_client.py @@ -52,7 +52,7 @@ def test_simple_load(client: InsertValuesJobClient, file_storage: FileStorage) - def test_loading_errors(client: InsertValuesJobClient, file_storage: FileStorage) -> None: # test expected dbiapi exceptions for supported destinations import duckdb - from dlt.destinations.postgres.sql_client import psycopg2 + from dlt.destinations.impl.postgres.sql_client import psycopg2 TNotNullViolation = psycopg2.errors.NotNullViolation TNumericValueOutOfRange = psycopg2.errors.NumericValueOutOfRange diff --git a/tests/load/utils.py b/tests/load/utils.py index be2097c879..f8680b3885 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -229,7 +229,7 @@ def yield_client( ) -> Iterator[SqlJobClientBase]: os.environ.pop("DATASET_NAME", None) # import destination reference by name - destination = import_module(f"dlt.destinations.{destination_name}") + destination = import_module(f"dlt.destinations.impl.{destination_name}") # create initial config dest_config: DestinationClientDwhConfiguration = None dest_config = destination.spec()() diff --git a/tests/load/weaviate/test_naming.py b/tests/load/weaviate/test_naming.py index a965201425..25258a2479 100644 --- a/tests/load/weaviate/test_naming.py +++ b/tests/load/weaviate/test_naming.py @@ -1,7 +1,7 @@ import dlt, pytest -from dlt.destinations.weaviate.naming import NamingConvention -from dlt.destinations.weaviate.ci_naming import NamingConvention as CINamingConvention +from dlt.destinations.impl.weaviate.naming import NamingConvention +from dlt.destinations.impl.weaviate.ci_naming import NamingConvention as CINamingConvention from tests.common.utils import load_yml_case diff --git a/tests/load/weaviate/test_pipeline.py b/tests/load/weaviate/test_pipeline.py index 339c94575e..a6376ba1bc 100644 --- a/tests/load/weaviate/test_pipeline.py +++ b/tests/load/weaviate/test_pipeline.py @@ -6,10 +6,10 @@ from dlt.common.schema import Schema from dlt.common.utils import uniq_id -from dlt.destinations.weaviate import weaviate_adapter -from dlt.destinations.weaviate.exceptions import PropertyNameConflict -from dlt.destinations.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT -from dlt.destinations.weaviate.weaviate_client import WeaviateClient +from dlt.destinations.impl.weaviate import weaviate_adapter +from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict +from dlt.destinations.impl.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT +from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient from dlt.pipeline.exceptions import PipelineStepFailed from tests.pipeline.utils import assert_load_info diff --git a/tests/load/weaviate/test_weaviate_client.py b/tests/load/weaviate/test_weaviate_client.py index d102610f68..3ae739c90c 100644 --- a/tests/load/weaviate/test_weaviate_client.py +++ b/tests/load/weaviate/test_weaviate_client.py @@ -8,9 +8,9 @@ from dlt.common.utils import uniq_id from dlt.common.schema.typing import TWriteDisposition, TColumnSchema, TTableSchemaColumns -from dlt.destinations import weaviate -from dlt.destinations.weaviate.exceptions import PropertyNameConflict -from dlt.destinations.weaviate.weaviate_client import WeaviateClient +from dlt.destinations.impl import weaviate +from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict +from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient from dlt.common.storages.file_storage import FileStorage from dlt.common.schema.utils import new_table diff --git a/tests/load/weaviate/utils.py b/tests/load/weaviate/utils.py index d5568b0598..ed378191e6 100644 --- a/tests/load/weaviate/utils.py +++ b/tests/load/weaviate/utils.py @@ -6,8 +6,8 @@ from dlt.common.configuration.container import Container from dlt.common.schema.utils import get_columns_names_with_prop -from dlt.destinations.weaviate.weaviate_client import WeaviateClient -from dlt.destinations.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT +from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient +from dlt.destinations.impl.weaviate.weaviate_adapter import VECTORIZE_HINT, TOKENIZATION_HINT def assert_unordered_list_equal(list1: List[Any], list2: List[Any]) -> None: diff --git a/tests/normalize/utils.py b/tests/normalize/utils.py index 3ee14948c1..0ce099d4b6 100644 --- a/tests/normalize/utils.py +++ b/tests/normalize/utils.py @@ -1,10 +1,10 @@ from typing import Mapping, cast -from dlt.destinations.duckdb import capabilities as duck_insert_caps -from dlt.destinations.redshift import capabilities as rd_insert_caps -from dlt.destinations.postgres import capabilities as pg_insert_caps -from dlt.destinations.bigquery import capabilities as jsonl_caps -from dlt.destinations.filesystem import capabilities as filesystem_caps +from dlt.destinations.impl.duckdb import capabilities as duck_insert_caps +from dlt.destinations.impl.redshift import capabilities as rd_insert_caps +from dlt.destinations.impl.postgres import capabilities as pg_insert_caps +from dlt.destinations.impl.bigquery import capabilities as jsonl_caps +from dlt.destinations.impl.filesystem import capabilities as filesystem_caps DEFAULT_CAPS = pg_insert_caps diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py index 09d8e98d82..2f383c1c0a 100644 --- a/tests/pipeline/test_dlt_versions.py +++ b/tests/pipeline/test_dlt_versions.py @@ -10,8 +10,8 @@ from dlt.common.storages import FileStorage from dlt.common.schema.typing import LOADS_TABLE_NAME, VERSION_TABLE_NAME, TStoredSchema from dlt.common.configuration.resolve import resolve_configuration -from dlt.destinations.duckdb.configuration import DuckDbClientConfiguration -from dlt.destinations.duckdb.sql_client import DuckDbSqlClient +from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration +from dlt.destinations.impl.duckdb.sql_client import DuckDbSqlClient from tests.utils import TEST_STORAGE_ROOT, test_storage diff --git a/tests/tools/clean_redshift.py b/tests/tools/clean_redshift.py index 7444d69685..27680b26cd 100644 --- a/tests/tools/clean_redshift.py +++ b/tests/tools/clean_redshift.py @@ -1,5 +1,5 @@ -from dlt.destinations.postgres.postgres import PostgresClient -from dlt.destinations.postgres.sql_client import psycopg2 +from dlt.destinations.impl.postgres.postgres import PostgresClient +from dlt.destinations.impl.postgres.sql_client import psycopg2 from psycopg2.errors import InsufficientPrivilege, InternalError_, SyntaxError CONNECTION_STRING = "" From 3b5386107587c97117d2fee984d82867ef2fd9fd Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 8 Nov 2023 21:59:30 -0500 Subject: [PATCH 02/29] Mockup destination factory --- dlt/common/configuration/inject.py | 11 +++--- dlt/common/destination/reference.py | 37 ++++++++++++++++++++- dlt/destinations/__init__.py | 10 ++++++ dlt/destinations/impl/filesystem/factory.py | 26 +++++++++++++++ dlt/destinations/impl/postgres/factory.py | 25 ++++++++++++++ dlt/destinations/impl/snowflake/factory.py | 26 +++++++++++++++ 6 files changed, 130 insertions(+), 5 deletions(-) create mode 100644 dlt/destinations/impl/filesystem/factory.py create mode 100644 dlt/destinations/impl/postgres/factory.py create mode 100644 dlt/destinations/impl/snowflake/factory.py diff --git a/dlt/common/configuration/inject.py b/dlt/common/configuration/inject.py index 1880727a0f..4e214695f2 100644 --- a/dlt/common/configuration/inject.py +++ b/dlt/common/configuration/inject.py @@ -32,7 +32,8 @@ def with_config( sections: Tuple[str, ...] = (), sections_merge_style: ConfigSectionContext.TMergeFunc = ConfigSectionContext.prefer_incoming, auto_pipeline_section: bool = False, - include_defaults: bool = True + include_defaults: bool = True, + accept_partial: bool = False, ) -> TFun: ... @@ -45,7 +46,8 @@ def with_config( sections: Tuple[str, ...] = (), sections_merge_style: ConfigSectionContext.TMergeFunc = ConfigSectionContext.prefer_incoming, auto_pipeline_section: bool = False, - include_defaults: bool = True + include_defaults: bool = True, + accept_partial: bool = False, ) -> Callable[[TFun], TFun]: ... @@ -57,7 +59,8 @@ def with_config( sections: Tuple[str, ...] = (), sections_merge_style: ConfigSectionContext.TMergeFunc = ConfigSectionContext.prefer_incoming, auto_pipeline_section: bool = False, - include_defaults: bool = True + include_defaults: bool = True, + accept_partial: bool = False, ) -> Callable[[TFun], TFun]: """Injects values into decorated function arguments following the specification in `spec` or by deriving one from function's signature. @@ -139,7 +142,7 @@ def _wrap(*args: Any, **kwargs: Any) -> Any: with _RESOLVE_LOCK: with inject_section(section_context): # print(f"RESOLVE CONF in inject: {f.__name__}: {section_context.sections} vs {sections}") - config = resolve_configuration(config or SPEC(), explicit_value=bound_args.arguments) + config = resolve_configuration(config or SPEC(), explicit_value=bound_args.arguments, accept_partial=accept_partial) resolved_params = dict(config) # overwrite or add resolved params for p in sig.parameters.values(): diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index ded654e965..cb6e02c8db 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -344,7 +344,7 @@ def should_truncate_table_before_load_on_staging_destination(self, table: TTable # the default is to truncate the tables on the staging destination... return True -TDestinationReferenceArg = Union["DestinationReference", ModuleType, None, str] +TDestinationReferenceArg = Union["DestinationReference", ModuleType, None, str, "DestinationFactory"] class DestinationReference(Protocol): @@ -397,6 +397,41 @@ def from_name(destination: TDestinationReferenceArg) -> "DestinationReference": @staticmethod def to_name(destination: TDestinationReferenceArg) -> str: + if isinstance(destination, DestinationFactory): + return destination.__name__ if isinstance(destination, ModuleType): return get_module_name(destination) return destination.split(".")[-1] # type: ignore + + +class DestinationFactory(ABC): + """A destination factory that can be partially pre-configured + with credentials and other config params. + """ + credentials: Optional[CredentialsConfiguration] = None + config_params: Optional[Dict[str, Any]] = None + + @property + @abstractmethod + def destination(self) -> DestinationReference: + """Returns the destination module""" + ... + + @property + def __name__(self) -> str: + return self.destination.__name__ + + def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": + # TODO: Raise error somewhere if both DestinationFactory and credentials argument are used together in pipeline + cfg = initial_config.copy() + for key, value in self.config_params.items(): + setattr(cfg, key, value) + if self.credentials: + cfg.credentials = self.credentials + return self.destination.client(schema, cfg) + + def capabilities(self) -> DestinationCapabilitiesContext: + return self.destination.capabilities() + + def spec(self) -> Type[DestinationClientConfiguration]: + return self.destination.spec() diff --git a/dlt/destinations/__init__.py b/dlt/destinations/__init__.py index e69de29bb2..cd8d0dc265 100644 --- a/dlt/destinations/__init__.py +++ b/dlt/destinations/__init__.py @@ -0,0 +1,10 @@ +from dlt.destinations.impl.postgres.factory import postgres +from dlt.destinations.impl.snowflake.factory import snowflake +from dlt.destinations.impl.filesystem.factory import filesystem + + +__all__ = [ + "postgres", + "snowflake", + "filesystem", +] diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py new file mode 100644 index 0000000000..2e49c8a6f1 --- /dev/null +++ b/dlt/destinations/impl/filesystem/factory.py @@ -0,0 +1,26 @@ +import typing as t + +from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration +from dlt.destinations.impl import filesystem as _filesystem +from dlt.common.configuration import with_config, known_sections +from dlt.common.destination.reference import DestinationClientConfiguration, DestinationFactory +from dlt.common.storages.configuration import FileSystemCredentials + + +class filesystem(DestinationFactory): + + destination = _filesystem + + @with_config(spec=FilesystemDestinationClientConfiguration, sections=(known_sections.DESTINATION, 'filesystem'), accept_partial=True) + def __init__( + self, + bucket_url: str = None, + credentials: FileSystemCredentials = None, + **kwargs: t.Any, + ) -> None: + cfg: FilesystemDestinationClientConfiguration = kwargs['_dlt_config'] + self.credentials = cfg.credentials + self.config_params = { + "credentials": cfg.credentials, + "bucket_url": cfg.bucket_url, + } diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py new file mode 100644 index 0000000000..eb686a1216 --- /dev/null +++ b/dlt/destinations/impl/postgres/factory.py @@ -0,0 +1,25 @@ +import typing as t + +from dlt.common.configuration import with_config, known_sections +from dlt.common.destination.reference import DestinationClientConfiguration, DestinationFactory + +from dlt.destinations.impl.postgres.configuration import PostgresCredentials, PostgresClientConfiguration +from dlt.destinations.impl import postgres as _postgres + + +class postgres(DestinationFactory): + + destination = _postgres + + @with_config(spec=PostgresClientConfiguration, sections=(known_sections.DESTINATION, 'postgres'), accept_partial=True) + def __init__( + self, + credentials: PostgresCredentials = None, + create_indexes: bool = True, + **kwargs: t.Any, + ) -> None: + cfg: PostgresClientConfiguration = kwargs['_dlt_config'] + self.credentials = cfg.credentials + self.config_params = { + "created_indexes": cfg.create_indexes, + } diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py new file mode 100644 index 0000000000..c1bc915704 --- /dev/null +++ b/dlt/destinations/impl/snowflake/factory.py @@ -0,0 +1,26 @@ +import typing as t + +from dlt.destinations.impl.snowflake.configuration import SnowflakeCredentials, SnowflakeClientConfiguration +from dlt.destinations.impl import snowflake as _snowflake +from dlt.common.configuration import with_config, known_sections +from dlt.common.destination.reference import DestinationClientConfiguration, DestinationFactory + + +class snowflake(DestinationFactory): + + destination = _snowflake + + @with_config(spec=SnowflakeClientConfiguration, sections=(known_sections.DESTINATION, 'snowflake'), accept_partial=True) + def __init__( + self, + credentials: SnowflakeCredentials = None, + stage_name: t.Optional[str] = None, + keep_staged_files: bool = True, + **kwargs: t.Any, + ) -> None: + cfg: SnowflakeClientConfiguration = kwargs['_dlt_config'] + self.credentials = cfg.credentials + self.config_params = { + "stage_name": cfg.stage_name, + "keep_staged_files": cfg.keep_staged_files, + } From 4b449e3ba71f0a66223b038f39dad25e99e02ebf Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Tue, 14 Nov 2023 16:30:53 -0500 Subject: [PATCH 03/29] Destination factory replacing reference and dest __init__ --- dlt/cli/deploy_command.py | 4 +- dlt/cli/init_command.py | 6 +- dlt/cli/pipeline_command.py | 2 +- dlt/common/destination/__init__.py | 3 +- dlt/common/destination/reference.py | 192 ++++++++++++-------- dlt/common/pipeline.py | 4 +- dlt/destinations/__init__.py | 2 + dlt/destinations/impl/duckdb/factory.py | 31 ++++ dlt/destinations/impl/filesystem/factory.py | 25 ++- dlt/destinations/impl/postgres/factory.py | 25 ++- dlt/destinations/impl/snowflake/__init__.py | 18 +- dlt/destinations/impl/snowflake/factory.py | 26 ++- dlt/helpers/streamlit_helper.py | 2 +- dlt/load/load.py | 12 +- dlt/pipeline/__init__.py | 8 +- dlt/pipeline/pipeline.py | 63 ++++--- dlt/pipeline/track.py | 8 +- tests/common/test_destination.py | 15 +- tests/load/filesystem/utils.py | 8 +- tests/load/pipeline/test_arrow_loading.py | 2 +- tests/pipeline/test_pipeline.py | 2 +- tests/pipeline/test_pipeline_state.py | 4 +- 22 files changed, 281 insertions(+), 181 deletions(-) create mode 100644 dlt/destinations/impl/duckdb/factory.py diff --git a/dlt/cli/deploy_command.py b/dlt/cli/deploy_command.py index 7634f173b3..a7bdf2e0e7 100644 --- a/dlt/cli/deploy_command.py +++ b/dlt/cli/deploy_command.py @@ -16,7 +16,7 @@ from dlt.version import DLT_PKG_NAME -from dlt.common.destination.reference import DestinationReference +from dlt.common.destination.reference import Destination REQUIREMENTS_GITHUB_ACTION = "requirements_github_action.txt" DLT_DEPLOY_DOCS_URL = "https://dlthub.com/docs/walkthroughs/deploy-a-pipeline" @@ -198,7 +198,7 @@ def __init__( def _generate_workflow(self, *args: Optional[Any]) -> None: self.deployment_method = DeploymentMethods.airflow_composer.value - req_dep = f"{DLT_PKG_NAME}[{DestinationReference.to_name(self.state['destination'])}]" + req_dep = f"{DLT_PKG_NAME}[{Destination.to_name(self.state['destination'])}]" req_dep_line = f"{req_dep}>={pkg_version(DLT_PKG_NAME)}" self.artifacts["requirements_txt"] = req_dep_line diff --git a/dlt/cli/init_command.py b/dlt/cli/init_command.py index c246ac87de..4cec1706b9 100644 --- a/dlt/cli/init_command.py +++ b/dlt/cli/init_command.py @@ -12,7 +12,7 @@ from dlt.common.pipeline import get_dlt_repos_dir from dlt.common.source import _SOURCES from dlt.version import DLT_PKG_NAME, __version__ -from dlt.common.destination import DestinationReference +from dlt.common.destination import Destination from dlt.common.reflection.utils import rewrite_python_script from dlt.common.schema.utils import is_valid_schema_name from dlt.common.schema.exceptions import InvalidSchemaName @@ -160,8 +160,8 @@ def list_verified_sources_command(repo_location: str, branch: str = None) -> Non def init_command(source_name: str, destination_name: str, use_generic_template: bool, repo_location: str, branch: str = None) -> None: # try to import the destination and get config spec - destination_reference = DestinationReference.from_name(destination_name) - destination_spec = destination_reference.spec() + destination_reference = Destination.from_reference(destination_name) + destination_spec = destination_reference.spec fmt.echo("Looking up the init scripts in %s..." % fmt.bold(repo_location)) clone_storage = git.get_fresh_repo_files(repo_location, get_dlt_repos_dir(), branch=branch) diff --git a/dlt/cli/pipeline_command.py b/dlt/cli/pipeline_command.py index 52a9c8ffdc..80574f964f 100644 --- a/dlt/cli/pipeline_command.py +++ b/dlt/cli/pipeline_command.py @@ -196,7 +196,7 @@ def pipeline_command(operation: str, pipeline_name: str, pipelines_dir: str, ver fmt.warning(warning) return - fmt.echo("About to drop the following data in dataset %s in destination %s:" % (fmt.bold(drop.info["dataset_name"]), fmt.bold(p.destination.__name__))) + fmt.echo("About to drop the following data in dataset %s in destination %s:" % (fmt.bold(drop.info["dataset_name"]), fmt.bold(p.destination.name))) fmt.echo("%s: %s" % (fmt.style("Selected schema", fg="green"), drop.info["schema_name"])) fmt.echo("%s: %s" % (fmt.style("Selected resource(s)", fg="green"), drop.info["resource_names"])) fmt.echo("%s: %s" % (fmt.style("Table(s) to drop", fg="green"), drop.info["tables"])) diff --git a/dlt/common/destination/__init__.py b/dlt/common/destination/__init__.py index 88b5d5ef06..f0b22fa67a 100644 --- a/dlt/common/destination/__init__.py +++ b/dlt/common/destination/__init__.py @@ -1,10 +1,9 @@ from dlt.common.destination.capabilities import DestinationCapabilitiesContext, TLoaderFileFormat, ALL_SUPPORTED_FILE_FORMATS -from dlt.common.destination.reference import DestinationReference, TDestinationReferenceArg +from dlt.common.destination.reference import TDestinationReferenceArg, Destination __all__ = [ "DestinationCapabilitiesContext", "TLoaderFileFormat", "ALL_SUPPORTED_FILE_FORMATS", - "DestinationReference", "TDestinationReferenceArg", ] diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index cb6e02c8db..dd7af6e586 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod, abstractproperty from importlib import import_module from types import TracebackType, ModuleType -from typing import ClassVar, Final, Optional, NamedTuple, Literal, Sequence, Iterable, Type, Protocol, Union, TYPE_CHECKING, cast, List, ContextManager, Dict, Any +from typing import ClassVar, Final, Optional, NamedTuple, Literal, Sequence, Iterable, Type, Protocol, Union, TYPE_CHECKING, cast, List, ContextManager, Dict, Any, Callable from contextlib import contextmanager import datetime # noqa: 251 from copy import deepcopy @@ -23,6 +23,7 @@ from dlt.common.utils import get_module_name from dlt.common.configuration.specs import GcpCredentials, AwsCredentialsWithoutDefaults + TLoaderReplaceStrategy = Literal["truncate-and-insert", "insert-from-staging", "staging-optimized"] @@ -344,94 +345,137 @@ def should_truncate_table_before_load_on_staging_destination(self, table: TTable # the default is to truncate the tables on the staging destination... return True -TDestinationReferenceArg = Union["DestinationReference", ModuleType, None, str, "DestinationFactory"] +TDestinationReferenceArg = Union[str, "Destination", None] + + +# class DestinationReference(Protocol): +# __name__: str +# """Name of the destination""" + +# def capabilities(self) -> DestinationCapabilitiesContext: +# """Destination capabilities ie. supported loader file formats, identifier name lengths, naming conventions, escape function etc.""" + +# def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": +# """A job client responsible for starting and resuming load jobs""" + +# def spec(self) -> Type[DestinationClientConfiguration]: +# """A spec of destination configuration that also contains destination credentials""" + +# @staticmethod +# def from_name(destination: TDestinationReferenceArg) -> "DestinationReference": +# if destination is None: +# return None + +# # if destination is a str, get destination reference by dynamically importing module +# if isinstance(destination, str): +# try: +# if "." in destination: +# # this is full module name +# destination_ref = cast(DestinationReference, import_module(destination)) +# else: +# # from known location +# destination_ref = cast(DestinationReference, import_module(f"dlt.destinations.impl.{destination}")) +# except ImportError: +# if "." in destination: +# raise UnknownDestinationModule(destination) +# else: +# # allow local external module imported without dot +# try: +# destination_ref = cast(DestinationReference, import_module(destination)) +# except ImportError: +# raise UnknownDestinationModule(destination) +# else: +# destination_ref = cast(DestinationReference, destination) + +# # make sure the reference is correct +# try: +# c = destination_ref.spec() +# c.credentials +# except Exception: +# raise InvalidDestinationReference(destination) + +# return destination_ref + +# @staticmethod +# def to_name(destination: TDestinationReferenceArg) -> str: +# if isinstance(destination, DestinationFactory): +# return destination.name +# if isinstance(destination, ModuleType): +# return get_module_name(destination) +# return destination.split(".")[-1] # type: ignore + + +class Destination(ABC): + """A destination factory that can be partially pre-configured + with credentials and other config params. + """ + credentials: Optional[CredentialsConfiguration] = None + config_params: Optional[Dict[str, Any]] = None + def __init__(self, cfg: DestinationClientConfiguration) -> None: + cfg_dict = dict(cfg) + self.credentials = cfg_dict.pop("credentials", None) + self.config_params = {key: val for key, val in cfg_dict.items() if val is not None} -class DestinationReference(Protocol): - __name__: str - """Name of the destination""" + @property + @abstractmethod + def spec(self) -> Type[DestinationClientConfiguration]: + """Returns the destination configuration spec""" + ... + @property + @abstractmethod def capabilities(self) -> DestinationCapabilitiesContext: - """Destination capabilities ie. supported loader file formats, identifier name lengths, naming conventions, escape function etc.""" + """Returns the destination capabilities""" + ... - def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": - """A job client responsible for starting and resuming load jobs""" + @property + def name(self) -> str: + return self.__class__.__name__ - def spec(self) -> Type[DestinationClientConfiguration]: - """A spec of destination configuration that also contains destination credentials""" + @property + @abstractmethod + def client_class(self) -> Type[JobClientBase]: + """Returns the client class""" + ... @staticmethod - def from_name(destination: TDestinationReferenceArg) -> "DestinationReference": - if destination is None: - return None - - # if destination is a str, get destination reference by dynamically importing module - if isinstance(destination, str): - try: - if "." in destination: - # this is full module name - destination_ref = cast(DestinationReference, import_module(destination)) - else: - # from known location - destination_ref = cast(DestinationReference, import_module(f"dlt.destinations.impl.{destination}")) - except ImportError: - if "." in destination: - raise UnknownDestinationModule(destination) - else: - # allow local external module imported without dot - try: - destination_ref = cast(DestinationReference, import_module(destination)) - except ImportError: - raise UnknownDestinationModule(destination) - else: - destination_ref = cast(DestinationReference, destination) - - # make sure the reference is correct - try: - c = destination_ref.spec() - c.credentials - except Exception: - raise InvalidDestinationReference(destination) - - return destination_ref + def to_name(ref: TDestinationReferenceArg) -> str: + if not ref: + raise InvalidDestinationReference(ref) + if isinstance(ref, str): + return ref.rsplit(".", 1)[-1] + return ref.name @staticmethod - def to_name(destination: TDestinationReferenceArg) -> str: - if isinstance(destination, DestinationFactory): - return destination.__name__ - if isinstance(destination, ModuleType): - return get_module_name(destination) - return destination.split(".")[-1] # type: ignore - - -class DestinationFactory(ABC): - """A destination factory that can be partially pre-configured - with credentials and other config params. - """ - credentials: Optional[CredentialsConfiguration] = None - config_params: Optional[Dict[str, Any]] = None + def from_reference(ref: TDestinationReferenceArg, *args, **kwargs) -> "Destination": + if isinstance(ref, Destination): + return ref + if not isinstance(ref, str): + raise InvalidDestinationReference(ref) + try: + if "." in ref: + module_path, attr_name = ref.rsplit(".", 1) + dest_module = import_module(module_path) + else: + from dlt import destinations as dest_module + attr_name = ref + except ImportError as e: + raise UnknownDestinationModule(ref) from e - @property - @abstractmethod - def destination(self) -> DestinationReference: - """Returns the destination module""" - ... + try: + factory: Type[Destination] = getattr(dest_module, attr_name) + except AttributeError as e: + raise UnknownDestinationModule(ref) from e + return factory(*args, **kwargs) - @property - def __name__(self) -> str: - return self.destination.__name__ def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": # TODO: Raise error somewhere if both DestinationFactory and credentials argument are used together in pipeline cfg = initial_config.copy() - for key, value in self.config_params.items(): - setattr(cfg, key, value) + cfg.update(self.config_params) + # for key, value in self.config_params.items(): + # setattr(cfg, key, value) if self.credentials: cfg.credentials = self.credentials - return self.destination.client(schema, cfg) - - def capabilities(self) -> DestinationCapabilitiesContext: - return self.destination.capabilities() - - def spec(self) -> Type[DestinationClientConfiguration]: - return self.destination.spec() + return self.client_class(schema, cfg) diff --git a/dlt/common/pipeline.py b/dlt/common/pipeline.py index aeb0bdc68a..515ae9c3cf 100644 --- a/dlt/common/pipeline.py +++ b/dlt/common/pipeline.py @@ -14,7 +14,7 @@ from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.configuration.paths import get_dlt_data_dir from dlt.common.configuration.specs import RunConfiguration -from dlt.common.destination import DestinationReference, TDestinationReferenceArg +from dlt.common.destination import Destination, TDestinationReferenceArg from dlt.common.exceptions import DestinationHasFailedJobs, PipelineStateNotAvailable, ResourceNameNotAvailable, SourceSectionNotAvailable from dlt.common.schema import Schema from dlt.common.schema.typing import TColumnNames, TColumnSchema, TWriteDisposition @@ -177,7 +177,7 @@ class SupportsPipeline(Protocol): """Name of the pipeline""" default_schema_name: str """Name of the default schema""" - destination: DestinationReference + destination: Destination """The destination reference which is ModuleType. `destination.__name__` returns the name string""" dataset_name: str """Name of the dataset to which pipeline will be loaded to""" diff --git a/dlt/destinations/__init__.py b/dlt/destinations/__init__.py index cd8d0dc265..0abba830ab 100644 --- a/dlt/destinations/__init__.py +++ b/dlt/destinations/__init__.py @@ -1,10 +1,12 @@ from dlt.destinations.impl.postgres.factory import postgres from dlt.destinations.impl.snowflake.factory import snowflake from dlt.destinations.impl.filesystem.factory import filesystem +from dlt.destinations.impl.duckdb.factory import duckdb __all__ = [ "postgres", "snowflake", "filesystem", + "duckdb", ] diff --git a/dlt/destinations/impl/duckdb/factory.py b/dlt/destinations/impl/duckdb/factory.py new file mode 100644 index 0000000000..656e866c54 --- /dev/null +++ b/dlt/destinations/impl/duckdb/factory.py @@ -0,0 +1,31 @@ +import typing as t + +from dlt.common.configuration import with_config, known_sections +from dlt.common.destination.reference import DestinationClientConfiguration, Destination + +from dlt.destinations.impl.duckdb.configuration import DuckDbCredentials, DuckDbClientConfiguration +from dlt.destinations.impl.duckdb import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.duckdb.duck import DuckDbClient + + +class duckdb(Destination): + + capabilities = capabilities() + spec = DuckDbClientConfiguration + + @property + def client_class(self) -> t.Type["DuckDbClient"]: + from dlt.destinations.impl.duckdb.duck import DuckDbClient + + return DuckDbClient + + @with_config(spec=DuckDbClientConfiguration, sections=(known_sections.DESTINATION, 'duckdb'), accept_partial=True) + def __init__( + self, + credentials: DuckDbCredentials = None, + create_indexes: bool = True, + **kwargs: t.Any, + ) -> None: + super().__init__(kwargs['_dlt_config']) diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py index 2e49c8a6f1..13ea3b4ce4 100644 --- a/dlt/destinations/impl/filesystem/factory.py +++ b/dlt/destinations/impl/filesystem/factory.py @@ -1,15 +1,25 @@ import typing as t from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration -from dlt.destinations.impl import filesystem as _filesystem +from dlt.destinations.impl.filesystem import capabilities from dlt.common.configuration import with_config, known_sections -from dlt.common.destination.reference import DestinationClientConfiguration, DestinationFactory +from dlt.common.destination.reference import DestinationClientConfiguration, Destination from dlt.common.storages.configuration import FileSystemCredentials +if t.TYPE_CHECKING: + from dlt.destinations.impl.filesystem.filesystem import FilesystemClient -class filesystem(DestinationFactory): - destination = _filesystem +class filesystem(Destination): + + capabilities = capabilities() + spec = FilesystemDestinationClientConfiguration + + @property + def client_class(self) -> t.Type["FilesystemClient"]: + from dlt.destinations.impl.filesystem.filesystem import FilesystemClient + + return FilesystemClient @with_config(spec=FilesystemDestinationClientConfiguration, sections=(known_sections.DESTINATION, 'filesystem'), accept_partial=True) def __init__( @@ -18,9 +28,4 @@ def __init__( credentials: FileSystemCredentials = None, **kwargs: t.Any, ) -> None: - cfg: FilesystemDestinationClientConfiguration = kwargs['_dlt_config'] - self.credentials = cfg.credentials - self.config_params = { - "credentials": cfg.credentials, - "bucket_url": cfg.bucket_url, - } + super().__init__(kwargs['_dlt_config']) diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py index eb686a1216..67e9d99090 100644 --- a/dlt/destinations/impl/postgres/factory.py +++ b/dlt/destinations/impl/postgres/factory.py @@ -1,15 +1,26 @@ import typing as t from dlt.common.configuration import with_config, known_sections -from dlt.common.destination.reference import DestinationClientConfiguration, DestinationFactory +from dlt.common.destination.reference import DestinationClientConfiguration, Destination from dlt.destinations.impl.postgres.configuration import PostgresCredentials, PostgresClientConfiguration -from dlt.destinations.impl import postgres as _postgres +from dlt.destinations.impl.postgres import capabilities +if t.TYPE_CHECKING: + from dlt.destinations.impl.postgres.postgres import PostgresClient -class postgres(DestinationFactory): - destination = _postgres +class postgres(Destination): + + capabilities = capabilities() + spec = PostgresClientConfiguration + + @property + def client_class(self) -> t.Type["PostgresClient"]: + from dlt.destinations.impl.postgres.postgres import PostgresClient + + return PostgresClient + @with_config(spec=PostgresClientConfiguration, sections=(known_sections.DESTINATION, 'postgres'), accept_partial=True) def __init__( @@ -18,8 +29,4 @@ def __init__( create_indexes: bool = True, **kwargs: t.Any, ) -> None: - cfg: PostgresClientConfiguration = kwargs['_dlt_config'] - self.credentials = cfg.credentials - self.config_params = { - "created_indexes": cfg.create_indexes, - } + super().__init__(kwargs['_dlt_config']) diff --git a/dlt/destinations/impl/snowflake/__init__.py b/dlt/destinations/impl/snowflake/__init__.py index 0cad57b309..8476ceb318 100644 --- a/dlt/destinations/impl/snowflake/__init__.py +++ b/dlt/destinations/impl/snowflake/__init__.py @@ -12,9 +12,9 @@ from dlt.destinations.impl.snowflake.configuration import SnowflakeClientConfiguration -@with_config(spec=SnowflakeClientConfiguration, sections=(known_sections.DESTINATION, "snowflake",)) -def _configure(config: SnowflakeClientConfiguration = config.value) -> SnowflakeClientConfiguration: - return config +# @with_config(spec=SnowflakeClientConfiguration, sections=(known_sections.DESTINATION, "snowflake",)) +# def _configure(config: SnowflakeClientConfiguration = config.value) -> SnowflakeClientConfiguration: +# return config def capabilities() -> DestinationCapabilitiesContext: @@ -37,12 +37,12 @@ def capabilities() -> DestinationCapabilitiesContext: return caps -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient +# def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: +# # import client when creating instance so capabilities and config specs can be accessed without dependencies installed +# from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient - return SnowflakeClient(schema, _configure(initial_config)) # type: ignore +# return SnowflakeClient(schema, _configure(initial_config)) # type: ignore -def spec() -> Type[DestinationClientConfiguration]: - return SnowflakeClientConfiguration +# def spec() -> Type[DestinationClientConfiguration]: +# return SnowflakeClientConfiguration diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py index c1bc915704..6e53d10701 100644 --- a/dlt/destinations/impl/snowflake/factory.py +++ b/dlt/destinations/impl/snowflake/factory.py @@ -1,14 +1,25 @@ import typing as t from dlt.destinations.impl.snowflake.configuration import SnowflakeCredentials, SnowflakeClientConfiguration -from dlt.destinations.impl import snowflake as _snowflake +from dlt.destinations.impl.snowflake import capabilities from dlt.common.configuration import with_config, known_sections -from dlt.common.destination.reference import DestinationClientConfiguration, DestinationFactory +from dlt.common.destination.reference import DestinationClientConfiguration, Destination +from dlt.common.destination import DestinationCapabilitiesContext +if t.TYPE_CHECKING: + from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient -class snowflake(DestinationFactory): - destination = _snowflake +class snowflake(Destination): + + capabilities = capabilities() + spec = SnowflakeClientConfiguration + + @property + def client_class(self) -> t.Type["SnowflakeClient"]: + from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient + + return SnowflakeClient @with_config(spec=SnowflakeClientConfiguration, sections=(known_sections.DESTINATION, 'snowflake'), accept_partial=True) def __init__( @@ -18,9 +29,4 @@ def __init__( keep_staged_files: bool = True, **kwargs: t.Any, ) -> None: - cfg: SnowflakeClientConfiguration = kwargs['_dlt_config'] - self.credentials = cfg.credentials - self.config_params = { - "stage_name": cfg.stage_name, - "keep_staged_files": cfg.keep_staged_files, - } + super().__init__(kwargs['_dlt_config']) diff --git a/dlt/helpers/streamlit_helper.py b/dlt/helpers/streamlit_helper.py index 7921e4e2e1..e43e794bf6 100644 --- a/dlt/helpers/streamlit_helper.py +++ b/dlt/helpers/streamlit_helper.py @@ -120,7 +120,7 @@ def _query_data_live(query: str, schema_name: str = None) -> pd.DataFrame: schema_names = ", ".join(sorted(pipeline.schema_names)) st.markdown(f""" * pipeline name: **{pipeline.pipeline_name}** - * destination: **{str(credentials)}** in **{pipeline.destination.__name__}** + * destination: **{str(credentials)}** in **{pipeline.destination.name}** * dataset name: **{pipeline.dataset_name}** * default schema name: **{pipeline.default_schema_name}** * all schema names: **{schema_names}** diff --git a/dlt/load/load.py b/dlt/load/load.py index d27274ffb1..76fb842927 100644 --- a/dlt/load/load.py +++ b/dlt/load/load.py @@ -20,7 +20,7 @@ from dlt.common.schema import Schema, TSchemaTables from dlt.common.schema.typing import TTableSchema, TWriteDisposition from dlt.common.storages import LoadStorage -from dlt.common.destination.reference import DestinationClientDwhConfiguration, FollowupJob, JobClientBase, WithStagingDataset, DestinationReference, LoadJob, NewLoadJob, TLoadJobState, DestinationClientConfiguration, SupportsStagingDestination +from dlt.common.destination.reference import DestinationClientDwhConfiguration, FollowupJob, JobClientBase, WithStagingDataset, Destination, LoadJob, NewLoadJob, TLoadJobState, DestinationClientConfiguration, SupportsStagingDestination from dlt.destinations.job_impl import EmptyLoadJob @@ -34,8 +34,8 @@ class Load(Runnable[Executor]): @with_config(spec=LoaderConfiguration, sections=(known_sections.LOAD,)) def __init__( self, - destination: DestinationReference, - staging_destination: DestinationReference = None, + destination: Destination, + staging_destination: Destination = None, collector: Collector = NULL_COLLECTOR, is_storage_owner: bool = False, config: LoaderConfiguration = config.value, @@ -47,7 +47,7 @@ def __init__( self.initial_client_config = initial_client_config self.initial_staging_client_config = initial_staging_client_config self.destination = destination - self.capabilities = destination.capabilities() + self.capabilities = destination.capabilities self.staging_destination = staging_destination self.pool = NullExecutor() self.load_storage: LoadStorage = self.create_storage(is_storage_owner) @@ -58,7 +58,7 @@ def __init__( def create_storage(self, is_storage_owner: bool) -> LoadStorage: supported_file_formats = self.capabilities.supported_loader_file_formats if self.staging_destination: - supported_file_formats = self.staging_destination.capabilities().supported_loader_file_formats + ["reference"] + supported_file_formats = self.staging_destination.capabilities.supported_loader_file_formats + ["reference"] if isinstance(self.get_destination_client(Schema("test")), WithStagingDataset): supported_file_formats += ["sql"] load_storage = LoadStorage( @@ -76,7 +76,7 @@ def get_staging_destination_client(self, schema: Schema) -> JobClientBase: return self.staging_destination.client(schema, self.initial_staging_client_config) def is_staging_destination_job(self, file_path: str) -> bool: - return self.staging_destination is not None and os.path.splitext(file_path)[1][1:] in self.staging_destination.capabilities().supported_loader_file_formats + return self.staging_destination is not None and os.path.splitext(file_path)[1][1:] in self.staging_destination.capabilities.supported_loader_file_formats @contextlib.contextmanager def maybe_with_staging_dataset(self, job_client: JobClientBase, use_staging: bool) -> Iterator[None]: diff --git a/dlt/pipeline/__init__.py b/dlt/pipeline/__init__.py index 71c37c40ba..774baa90f0 100644 --- a/dlt/pipeline/__init__.py +++ b/dlt/pipeline/__init__.py @@ -7,7 +7,7 @@ from dlt.common.configuration import with_config from dlt.common.configuration.container import Container from dlt.common.configuration.inject import get_orig_args, last_config -from dlt.common.destination.reference import DestinationReference, TDestinationReferenceArg +from dlt.common.destination.reference import Destination, TDestinationReferenceArg from dlt.common.pipeline import LoadInfo, PipelineContext, get_dlt_pipelines_dir from dlt.pipeline.configuration import PipelineConfiguration, ensure_correct_pipeline_kwargs @@ -116,8 +116,8 @@ def pipeline( if not pipelines_dir: pipelines_dir = get_dlt_pipelines_dir() - destination = DestinationReference.from_name(destination or kwargs["destination_name"]) - staging = DestinationReference.from_name(staging or kwargs.get("staging_name", None)) if staging is not None else None + destination = Destination.from_reference(destination or kwargs["destination_name"], credentials=credentials) + staging = Destination.from_reference(staging or kwargs.get("staging_name", None)) if staging is not None else None progress = collector_from_name(progress) # create new pipeline instance @@ -224,7 +224,7 @@ def run( Returns: LoadInfo: Information on loaded data including the list of package ids and failed job statuses. Please not that `dlt` will not raise if a single job terminally fails. Such information is provided via LoadInfo. """ - destination = DestinationReference.from_name(destination) + destination = Destination.from_reference(destination, credentials=credentials) return pipeline().run( data, destination=destination, diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index e6e27afec7..0a4bc78889 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -25,7 +25,7 @@ from dlt.common.runners import pool_runner as runner from dlt.common.storages import LiveSchemaStorage, NormalizeStorage, LoadStorage, SchemaStorage, FileStorage, NormalizeStorageConfiguration, SchemaStorageConfiguration, LoadStorageConfiguration from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import (DestinationClientDwhConfiguration, WithStateSync, DestinationReference, JobClientBase, DestinationClientConfiguration, +from dlt.common.destination.reference import (DestinationClientDwhConfiguration, WithStateSync, Destination, JobClientBase, DestinationClientConfiguration, TDestinationReferenceArg, DestinationClientStagingConfiguration, DestinationClientStagingConfiguration, DestinationClientDwhWithStagingConfiguration) from dlt.common.destination.capabilities import INTERNAL_LOADER_FILE_FORMATS @@ -166,9 +166,9 @@ class Pipeline(SupportsPipeline): """A directory where the pipelines' working directories are created""" working_dir: str """A working directory of the pipeline""" - destination: DestinationReference = None - staging: DestinationReference = None - """The destination reference which is ModuleType. `destination.__name__` returns the name string""" + destination: Destination = None + staging: Destination = None + """The destination reference which is ModuleType. `destination.name` returns the name string""" dataset_name: str = None """Name of the dataset to which pipeline will be loaded to""" credentials: Any = None @@ -183,8 +183,8 @@ def __init__( pipeline_name: str, pipelines_dir: str, pipeline_salt: TSecretValue, - destination: DestinationReference, - staging: DestinationReference, + destination: Destination, + staging: Destination, dataset_name: str, credentials: Any, import_schema_path: str, @@ -718,7 +718,7 @@ def _sql_job_client(self, schema: Schema, credentials: Any = None) -> SqlJobClie if isinstance(client, SqlJobClientBase): return client else: - raise SqlClientNotAvailable(self.pipeline_name, self.destination.__name__) + raise SqlClientNotAvailable(self.pipeline_name, self.destination.name) def _get_normalize_storage(self) -> NormalizeStorage: return NormalizeStorage(True, self._normalize_storage_config) @@ -879,7 +879,7 @@ def _extract_source(self, storage: ExtractorStorage, source: DltSource, max_para return extract_id - def _get_destination_client_initial_config(self, destination: DestinationReference = None, credentials: Any = None, as_staging: bool = False) -> DestinationClientConfiguration: + def _get_destination_client_initial_config(self, destination: Destination = None, credentials: Any = None, as_staging: bool = False) -> DestinationClientConfiguration: destination = destination or self.destination if not destination: raise PipelineConfigMissing( @@ -889,7 +889,7 @@ def _get_destination_client_initial_config(self, destination: DestinationReferen "Please provide `destination` argument to `pipeline`, `run` or `load` method directly or via .dlt config.toml file or environment variable." ) # create initial destination client config - client_spec = destination.spec() + client_spec = destination.spec # initialize explicit credentials if not as_staging: # explicit credentials passed to dlt.pipeline should not be applied to staging @@ -900,6 +900,7 @@ def _get_destination_client_initial_config(self, destination: DestinationReferen client_spec.get_resolvable_fields()["credentials"], credentials ) + destination.credentials = credentials # this client support many schemas and datasets if issubclass(client_spec, DestinationClientDwhConfiguration): @@ -953,10 +954,10 @@ def _get_destination_capabilities(self) -> DestinationCapabilitiesContext: "normalize", "Please provide `destination` argument to `pipeline`, `run` or `load` method directly or via .dlt config.toml file or environment variable." ) - return self.destination.capabilities() + return self.destination.capabilities def _get_staging_capabilities(self) -> Optional[DestinationCapabilitiesContext]: - return self.staging.capabilities() if self.staging is not None else None + return self.staging.capabilities if self.staging is not None else None def _validate_pipeline_name(self) -> None: try: @@ -985,17 +986,19 @@ def _set_context(self, is_active: bool) -> None: del self._container[DestinationCapabilitiesContext] def _set_destinations(self, destination: TDestinationReferenceArg, staging: TDestinationReferenceArg) -> None: - destination_mod = DestinationReference.from_name(destination) - self.destination = destination_mod or self.destination + # destination_mod = DestinationReference.from_name(destination) + if destination: + self.destination = Destination.from_reference(destination) - if destination and not self.destination.capabilities().supported_loader_file_formats and not staging: - logger.warning(f"The destination {destination_mod.__name__} requires the filesystem staging destination to be set, but it was not provided. Setting it to 'filesystem'.") + if destination and not self.destination.capabilities.supported_loader_file_formats and not staging: + logger.warning(f"The destination {self.destination.name} requires the filesystem staging destination to be set, but it was not provided. Setting it to 'filesystem'.") staging = "filesystem" if staging: - staging_module = DestinationReference.from_name(staging) - if staging_module and not issubclass(staging_module.spec(), DestinationClientStagingConfiguration): - raise DestinationNoStagingMode(staging_module.__name__) + # staging_module = DestinationReference.from_name(staging) + staging_module = Destination.from_reference(staging) + if staging_module and not issubclass(staging_module.spec, DestinationClientStagingConfiguration): + raise DestinationNoStagingMode(staging_module.name) self.staging = staging_module or self.staging with self._maybe_destination_capabilities(): @@ -1014,8 +1017,10 @@ def _maybe_destination_capabilities(self, loader_file_format: TLoaderFileFormat caps = injected_caps.__enter__() caps.preferred_loader_file_format = self._resolve_loader_file_format( - DestinationReference.to_name(self.destination), - DestinationReference.to_name(self.staging) if self.staging else None, + self.destination.name, + # DestinationReference.to_name(self.destination), + self.staging.name if self.staging else None, + # DestinationReference.to_name(self.staging) if self.staging else None, destination_caps, stage_caps, loader_file_format) caps.supported_loader_file_formats = ( destination_caps.supported_staging_file_formats if stage_caps else None @@ -1143,12 +1148,12 @@ def _restore_state_from_destination(self) -> Optional[TPipelineState]: if isinstance(job_client, WithStateSync): state = load_state_from_destination(self.pipeline_name, job_client) if state is None: - logger.info(f"The state was not found in the destination {self.destination.__name__}:{dataset_name}") + logger.info(f"The state was not found in the destination {self.destination.name}:{dataset_name}") else: - logger.info(f"The state was restored from the destination {self.destination.__name__}:{dataset_name}") + logger.info(f"The state was restored from the destination {self.destination.name}:{dataset_name}") else: state = None - logger.info(f"Destination does not support metadata storage {self.destination.__name__}:{dataset_name}") + logger.info(f"Destination does not support metadata storage {self.destination.name}:{dataset_name}") return state finally: # restore the use_single_dataset option @@ -1163,17 +1168,17 @@ def _get_schemas_from_destination(self, schema_names: Sequence[str], always_down if not self._schema_storage.has_schema(schema.name) or always_download: with self._get_destination_clients(schema)[0] as job_client: if not isinstance(job_client, WithStateSync): - logger.info(f"Destination does not support metadata storage {self.destination.__name__}") + logger.info(f"Destination does not support metadata storage {self.destination.name}") return restored_schemas schema_info = job_client.get_stored_schema() if schema_info is None: - logger.info(f"The schema {schema.name} was not found in the destination {self.destination.__name__}:{self.dataset_name}") + logger.info(f"The schema {schema.name} was not found in the destination {self.destination.name}:{self.dataset_name}") # try to import schema with contextlib.suppress(FileNotFoundError): self._schema_storage.load_schema(schema.name) else: schema = Schema.from_dict(json.loads(schema_info.schema)) - logger.info(f"The schema {schema.name} version {schema.version} hash {schema.stored_version_hash} was restored from the destination {self.destination.__name__}:{self.dataset_name}") + logger.info(f"The schema {schema.name} version {schema.version} hash {schema.stored_version_hash} was restored from the destination {self.destination.name}:{self.dataset_name}") restored_schemas.append(schema) return restored_schemas @@ -1230,7 +1235,7 @@ def _state_to_props(self, state: TPipelineState) -> None: if prop in state["_local"] and not prop.startswith("_"): setattr(self, prop, state["_local"][prop]) # type: ignore if "destination" in state: - self._set_destinations(DestinationReference.from_name(self.destination), DestinationReference.from_name(self.staging) if "staging" in state else None ) + self._set_destinations(self.destination, self.staging if "staging" in state else None ) def _props_to_state(self, state: TPipelineState) -> None: """Write pipeline props to `state`""" @@ -1241,9 +1246,9 @@ def _props_to_state(self, state: TPipelineState) -> None: if not prop.startswith("_"): state["_local"][prop] = getattr(self, prop) # type: ignore if self.destination: - state["destination"] = self.destination.__name__ + state["destination"] = self.destination.name if self.staging: - state["staging"] = self.staging.__name__ + state["staging"] = self.staging.name state["schema_names"] = self._schema_storage.list_schemas() def _save_state(self, state: TPipelineState) -> None: diff --git a/dlt/pipeline/track.py b/dlt/pipeline/track.py index ec42bc788f..07e9a2d137 100644 --- a/dlt/pipeline/track.py +++ b/dlt/pipeline/track.py @@ -9,7 +9,7 @@ from dlt.common.runtime.segment import track as dlthub_telemetry_track from dlt.common.runtime.slack import send_slack_message from dlt.common.pipeline import LoadInfo, ExtractInfo, SupportsPipeline -from dlt.common.destination import DestinationReference +from dlt.common.destination import Destination from dlt.pipeline.typing import TPipelineStep from dlt.pipeline.trace import PipelineTrace, PipelineStepTrace @@ -21,7 +21,7 @@ def _add_sentry_tags(span: Span, pipeline: SupportsPipeline) -> None: span.set_tag("pipeline_name", pipeline.pipeline_name) if pipeline.destination: - span.set_tag("destination", pipeline.destination.__name__) + span.set_tag("destination", pipeline.destination.name) if pipeline.dataset_name: span.set_tag("dataset_name", pipeline.dataset_name) except ImportError: @@ -87,7 +87,7 @@ def on_end_trace_step(trace: PipelineTrace, step: PipelineStepTrace, pipeline: S props = { "elapsed": (step.finished_at - trace.started_at).total_seconds(), "success": step.step_exception is None, - "destination_name": DestinationReference.to_name(pipeline.destination) if pipeline.destination else None, + "destination_name": pipeline.destination.name if pipeline.destination else None, "pipeline_name_hash": digest128(pipeline.pipeline_name), "dataset_name_hash": digest128(pipeline.dataset_name) if pipeline.dataset_name else None, "default_schema_name_hash": digest128(pipeline.default_schema_name) if pipeline.default_schema_name else None, @@ -107,4 +107,4 @@ def on_end_trace(trace: PipelineTrace, pipeline: SupportsPipeline) -> None: if pipeline.runtime_config.sentry_dsn: # print(f"---END SENTRY TX: {trace.transaction_id} SCOPE: {Hub.current.scope}") with contextlib.suppress(Exception): - Hub.current.scope.span.__exit__(None, None, None) \ No newline at end of file + Hub.current.scope.span.__exit__(None, None, None) diff --git a/tests/common/test_destination.py b/tests/common/test_destination.py index 7afa10ed68..00a8480ef4 100644 --- a/tests/common/test_destination.py +++ b/tests/common/test_destination.py @@ -1,6 +1,7 @@ import pytest -from dlt.common.destination.reference import DestinationClientDwhConfiguration, DestinationReference +from dlt.common.destination.reference import DestinationClientDwhConfiguration, Destination +from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.exceptions import InvalidDestinationReference, UnknownDestinationModule from dlt.common.schema import Schema from dlt.common.schema.exceptions import InvalidDatasetName @@ -11,24 +12,24 @@ def test_import_unknown_destination() -> None: # standard destination with pytest.raises(UnknownDestinationModule): - DestinationReference.from_name("meltdb") + Destination.from_reference("meltdb") # custom module with pytest.raises(UnknownDestinationModule): - DestinationReference.from_name("melt.db") + Destination.from_reference("melt.db") def test_invalid_destination_reference() -> None: with pytest.raises(InvalidDestinationReference): - DestinationReference.from_name("tests.load.cases.fake_destination") + Destination.from_reference("tests.load.cases.fake_destination") def test_import_all_destinations() -> None: # this must pass without the client dependencies being imported for module in ACTIVE_DESTINATIONS: - dest = DestinationReference.from_name(module) - assert dest.__name__ == "dlt.destinations." + module + dest = Destination.from_reference(module) + assert dest.name == "dlt.destinations." + module dest.spec() - dest.capabilities() + assert isinstance(dest.capabilities, DestinationCapabilitiesContext) def test_normalize_dataset_name() -> None: diff --git a/tests/load/filesystem/utils.py b/tests/load/filesystem/utils.py index a3cc56e0da..6dfd042a4e 100644 --- a/tests/load/filesystem/utils.py +++ b/tests/load/filesystem/utils.py @@ -5,16 +5,16 @@ from dlt.load import Load from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.destination.reference import DestinationReference, LoadJob -from dlt.destinations.impl import filesystem +from dlt.common.destination.reference import Destination, LoadJob +from dlt.destinations import filesystem from dlt.destinations.impl.filesystem.filesystem import FilesystemClient from dlt.destinations.job_impl import EmptyLoadJob from tests.load.utils import prepare_load_package def setup_loader(dataset_name: str) -> Load: - destination: DestinationReference = filesystem # type: ignore[assignment] - config = filesystem.spec()(dataset_name=dataset_name) + destination: Destination = filesystem() + config = filesystem.spec(dataset_name=dataset_name) # setup loader with Container().injectable_context(ConfigSectionContext(sections=('filesystem',))): return Load( diff --git a/tests/load/pipeline/test_arrow_loading.py b/tests/load/pipeline/test_arrow_loading.py index bd709e764d..20cca7602c 100644 --- a/tests/load/pipeline/test_arrow_loading.py +++ b/tests/load/pipeline/test_arrow_loading.py @@ -71,7 +71,7 @@ def some_data(): for row in expected: for i in range(len(row)): if isinstance(row[i], datetime): - row[i] = reduce_pendulum_datetime_precision(row[i], pipeline.destination.capabilities().timestamp_precision) + row[i] = reduce_pendulum_datetime_precision(row[i], pipeline.destination.capabilities.timestamp_precision) load_id = load_info.loads_ids[0] diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 560a683709..78f5ffade6 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -173,7 +173,7 @@ def test_configured_destination(environment) -> None: p = dlt.pipeline() assert p.destination is not None - assert p.destination.__name__.endswith("postgres") + assert p.destination.name.endswith("postgres") assert p.pipeline_name == "postgres_pipe" diff --git a/tests/pipeline/test_pipeline_state.py b/tests/pipeline/test_pipeline_state.py index 14b881eedc..0e8dea2145 100644 --- a/tests/pipeline/test_pipeline_state.py +++ b/tests/pipeline/test_pipeline_state.py @@ -48,8 +48,8 @@ def test_restore_state_props() -> None: assert state["destination"].endswith("redshift") assert state["staging"].endswith("filesystem") # also instances are restored - assert p.destination.__name__.endswith("redshift") - assert p.staging.__name__.endswith("filesystem") + assert p.destination.name.endswith("redshift") + assert p.staging.name.endswith("filesystem") def test_managed_state() -> None: From f195fa75be45056774c8fbf78d84c1028d844593 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Sat, 11 Nov 2023 17:02:59 -0500 Subject: [PATCH 04/29] Update factories --- dlt/common/destination/reference.py | 7 ++++- dlt/destinations/__init__.py | 4 +++ dlt/destinations/impl/duckdb/__init__.py | 24 --------------- dlt/destinations/impl/dummy/factory.py | 33 +++++++++++++++++++++ dlt/destinations/impl/mssql/__init__.py | 24 --------------- dlt/destinations/impl/mssql/factory.py | 31 +++++++++++++++++++ dlt/destinations/impl/postgres/__init__.py | 20 ------------- dlt/destinations/impl/snowflake/__init__.py | 23 -------------- tests/load/pipeline/test_pipelines.py | 6 ++-- tests/load/pipeline/utils.py | 2 +- tests/load/test_dummy_client.py | 6 ++-- tests/load/utils.py | 2 +- 12 files changed, 82 insertions(+), 100 deletions(-) create mode 100644 dlt/destinations/impl/dummy/factory.py create mode 100644 dlt/destinations/impl/mssql/factory.py diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index dd7af6e586..018a2e11c0 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -448,7 +448,12 @@ def to_name(ref: TDestinationReferenceArg) -> str: return ref.name @staticmethod - def from_reference(ref: TDestinationReferenceArg, *args, **kwargs) -> "Destination": + def from_reference(ref: TDestinationReferenceArg, *args, **kwargs) -> Optional["Destination"]: + """Instantiate destination from str reference. + The ref can be a destination name or import path pointing to a destination class (e.g. `dlt.destinations.postgres`) + """ + if ref is None: + return None if isinstance(ref, Destination): return ref if not isinstance(ref, str): diff --git a/dlt/destinations/__init__.py b/dlt/destinations/__init__.py index 0abba830ab..04bc43bc1a 100644 --- a/dlt/destinations/__init__.py +++ b/dlt/destinations/__init__.py @@ -2,6 +2,8 @@ from dlt.destinations.impl.snowflake.factory import snowflake from dlt.destinations.impl.filesystem.factory import filesystem from dlt.destinations.impl.duckdb.factory import duckdb +from dlt.destinations.impl.dummy.factory import dummy +from dlt.destinations.impl.mssql.factory import mssql __all__ = [ @@ -9,4 +11,6 @@ "snowflake", "filesystem", "duckdb", + "dummy", + "mssql", ] diff --git a/dlt/destinations/impl/duckdb/__init__.py b/dlt/destinations/impl/duckdb/__init__.py index b2a57d0788..5cbc8dea53 100644 --- a/dlt/destinations/impl/duckdb/__init__.py +++ b/dlt/destinations/impl/duckdb/__init__.py @@ -1,20 +1,7 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_postgres_identifier, escape_duckdb_literal from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.impl.duckdb.configuration import DuckDbClientConfiguration - - -@with_config(spec=DuckDbClientConfiguration, sections=(known_sections.DESTINATION, "duckdb",)) -def _configure(config: DuckDbClientConfiguration = config.value) -> DuckDbClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -37,14 +24,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_truncate_command = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.duckdb.duck import DuckDbClient - - return DuckDbClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return DuckDbClientConfiguration diff --git a/dlt/destinations/impl/dummy/factory.py b/dlt/destinations/impl/dummy/factory.py new file mode 100644 index 0000000000..90136385e1 --- /dev/null +++ b/dlt/destinations/impl/dummy/factory.py @@ -0,0 +1,33 @@ +import typing as t + +from dlt.common.configuration import with_config, known_sections +from dlt.common.destination.reference import DestinationClientConfiguration, Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration, DummyClientCredentials +from dlt.destinations.impl.dummy import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.dummy.dummy import DummyClient + + +class dummy(Destination): + + spec = DummyClientConfiguration + + @property + def capabilitites(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["DummyClient"]: + from dlt.destinations.impl.dummy.dummy import DummyClient + + return DummyClient + + @with_config(spec=DummyClientConfiguration, sections=(known_sections.DESTINATION, 'dummy'), accept_partial=True) + def __init__( + self, + credentials: DummyClientCredentials = None, + **kwargs: t.Any, + ) -> None: + super().__init__(kwargs['_dlt_config']) diff --git a/dlt/destinations/impl/mssql/__init__.py b/dlt/destinations/impl/mssql/__init__.py index 8f9f92d4eb..40e971cacf 100644 --- a/dlt/destinations/impl/mssql/__init__.py +++ b/dlt/destinations/impl/mssql/__init__.py @@ -1,21 +1,8 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_postgres_identifier, escape_mssql_literal from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.wei import EVM_DECIMAL_PRECISION -from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration - - -@with_config(spec=MsSqlClientConfiguration, sections=(known_sections.DESTINATION, "mssql",)) -def _configure(config: MsSqlClientConfiguration = config.value) -> MsSqlClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -39,14 +26,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.timestamp_precision = 7 return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.mssql.mssql import MsSqlClient - - return MsSqlClient(schema, _configure(initial_config)) # type: ignore[arg-type] - - -def spec() -> Type[DestinationClientConfiguration]: - return MsSqlClientConfiguration diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py new file mode 100644 index 0000000000..ee49978fb6 --- /dev/null +++ b/dlt/destinations/impl/mssql/factory.py @@ -0,0 +1,31 @@ +import typing as t + +from dlt.common.configuration import with_config, known_sections +from dlt.common.destination.reference import DestinationClientConfiguration, Destination + +from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, MsSqlClientConfiguration +from dlt.destinations.impl.mssql import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.mssql.mssql import MsSqlClient + + +class mssql(Destination): + + capabilities = capabilities() + spec = MsSqlClientConfiguration + + @property + def client_class(self) -> t.Type["MsSqlClient"]: + from dlt.destinations.impl.mssql.mssql import MsSqlClient + + return MsSqlClient + + @with_config(spec=MsSqlClientConfiguration, sections=(known_sections.DESTINATION, 'mssql'), accept_partial=True) + def __init__( + self, + credentials: MsSqlCredentials = None, + create_indexes: bool = True, + **kwargs: t.Any, + ) -> None: + super().__init__(kwargs['_dlt_config']) diff --git a/dlt/destinations/impl/postgres/__init__.py b/dlt/destinations/impl/postgres/__init__.py index 54bc3297b1..009174ecc9 100644 --- a/dlt/destinations/impl/postgres/__init__.py +++ b/dlt/destinations/impl/postgres/__init__.py @@ -1,20 +1,9 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_postgres_identifier, escape_postgres_literal from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE from dlt.common.wei import EVM_DECIMAL_PRECISION -from dlt.destinations.impl.postgres.configuration import PostgresClientConfiguration - - -@with_config(spec=PostgresClientConfiguration, sections=(known_sections.DESTINATION, "postgres",)) -def _configure(config: PostgresClientConfiguration = config.value) -> PostgresClientConfiguration: - return config def capabilities() -> DestinationCapabilitiesContext: @@ -39,12 +28,3 @@ def capabilities() -> DestinationCapabilitiesContext: return caps -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.postgres.postgres import PostgresClient - - return PostgresClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return PostgresClientConfiguration diff --git a/dlt/destinations/impl/snowflake/__init__.py b/dlt/destinations/impl/snowflake/__init__.py index 8476ceb318..12e118eeab 100644 --- a/dlt/destinations/impl/snowflake/__init__.py +++ b/dlt/destinations/impl/snowflake/__init__.py @@ -1,20 +1,8 @@ -from typing import Type from dlt.common.data_writers.escape import escape_bigquery_identifier - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.data_writers.escape import escape_snowflake_identifier from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.impl.snowflake.configuration import SnowflakeClientConfiguration - - -# @with_config(spec=SnowflakeClientConfiguration, sections=(known_sections.DESTINATION, "snowflake",)) -# def _configure(config: SnowflakeClientConfiguration = config.value) -> SnowflakeClientConfiguration: -# return config def capabilities() -> DestinationCapabilitiesContext: @@ -35,14 +23,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_ddl_transactions = True caps.alter_add_multi_column = True return caps - - -# def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: -# # import client when creating instance so capabilities and config specs can be accessed without dependencies installed -# from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient - -# return SnowflakeClient(schema, _configure(initial_config)) # type: ignore - - -# def spec() -> Type[DestinationClientConfiguration]: -# return SnowflakeClientConfiguration diff --git a/tests/load/pipeline/test_pipelines.py b/tests/load/pipeline/test_pipelines.py index 99071a7ac6..2fc4aad1a8 100644 --- a/tests/load/pipeline/test_pipelines.py +++ b/tests/load/pipeline/test_pipelines.py @@ -8,7 +8,7 @@ from dlt.common.pipeline import SupportsPipeline from dlt.common import json, sleep -from dlt.common.destination.reference import DestinationReference +from dlt.common.destination import Destination from dlt.common.schema.schema import Schema from dlt.common.schema.typing import VERSION_TABLE_NAME from dlt.common.typing import TDataItem @@ -66,8 +66,8 @@ def data_fun() -> Iterator[Any]: # mock the correct destinations (never do that in normal code) with p.managed_state(): p._set_destinations( - DestinationReference.from_name(destination_config.destination), - DestinationReference.from_name(destination_config.staging) if destination_config.staging else None + Destination.from_reference(destination_config.destination), + Destination.from_reference(destination_config.staging) if destination_config.staging else None ) # does not reset the dataset name assert p.dataset_name in possible_dataset_names diff --git a/tests/load/pipeline/utils.py b/tests/load/pipeline/utils.py index 7ed71fe27a..113585f669 100644 --- a/tests/load/pipeline/utils.py +++ b/tests/load/pipeline/utils.py @@ -67,7 +67,7 @@ def _drop_dataset(schema_name: str) -> None: def _is_filesystem(p: dlt.Pipeline) -> bool: if not p.destination: return False - return p.destination.__name__.rsplit('.', 1)[-1] == 'filesystem' + return p.destination.name == 'filesystem' def assert_table(p: dlt.Pipeline, table_name: str, table_data: List[Any], schema_name: str = None, info: LoadInfo = None) -> None: diff --git a/tests/load/test_dummy_client.py b/tests/load/test_dummy_client.py index bb4b76c0b7..a959f6d960 100644 --- a/tests/load/test_dummy_client.py +++ b/tests/load/test_dummy_client.py @@ -11,12 +11,12 @@ from dlt.common.storages import FileStorage, LoadStorage from dlt.common.storages.load_storage import JobWithUnsupportedWriterException from dlt.common.utils import uniq_id -from dlt.common.destination.reference import DestinationReference, LoadJob +from dlt.common.destination.reference import Destination, LoadJob from dlt.load import Load from dlt.destinations.job_impl import EmptyLoadJob -from dlt.destinations.impl import dummy +from dlt.destinations import dummy from dlt.destinations.impl.dummy import dummy as dummy_impl from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration from dlt.load.exceptions import LoadClientJobFailed, LoadClientJobRetry @@ -445,7 +445,7 @@ def run_all(load: Load) -> None: def setup_loader(delete_completed_jobs: bool = False, client_config: DummyClientConfiguration = None) -> Load: # reset jobs for a test dummy_impl.JOBS = {} - destination: DestinationReference = dummy # type: ignore[assignment] + destination: Destination = dummy() client_config = client_config or DummyClientConfiguration(loader_file_format="jsonl") # patch destination to provide client_config # destination.client = lambda schema: dummy_impl.DummyClient(schema, client_config) diff --git a/tests/load/utils.py b/tests/load/utils.py index f8680b3885..4b1cfc2f1a 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -12,7 +12,7 @@ from dlt.common.configuration import resolve_configuration from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.destination.reference import DestinationClientDwhConfiguration, DestinationReference, JobClientBase, LoadJob, DestinationClientStagingConfiguration, WithStagingDataset, TDestinationReferenceArg +from dlt.common.destination.reference import DestinationClientDwhConfiguration, JobClientBase, LoadJob, DestinationClientStagingConfiguration, WithStagingDataset, TDestinationReferenceArg from dlt.common.destination import TLoaderFileFormat from dlt.common.data_writers import DataWriter from dlt.common.schema import TColumnSchema, TTableSchemaColumns, Schema From 340f63721ef2df33ffd4fa6d702032b8d087c921 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Sun, 12 Nov 2023 16:02:57 -0500 Subject: [PATCH 05/29] Defer duckdb credentials resolving in pipeline context --- dlt/common/destination/reference.py | 20 ++++++------- dlt/destinations/impl/duckdb/configuration.py | 30 ++++++++++--------- dlt/destinations/impl/dummy/factory.py | 2 +- dlt/pipeline/pipeline.py | 3 ++ tests/common/test_destination.py | 8 ++--- tests/load/duckdb/test_duckdb_client.py | 23 +++++++------- tests/load/utils.py | 8 ++--- 7 files changed, 49 insertions(+), 45 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 018a2e11c0..f232622c52 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -441,14 +441,14 @@ def client_class(self) -> Type[JobClientBase]: @staticmethod def to_name(ref: TDestinationReferenceArg) -> str: - if not ref: + if ref is None: raise InvalidDestinationReference(ref) if isinstance(ref, str): return ref.rsplit(".", 1)[-1] return ref.name @staticmethod - def from_reference(ref: TDestinationReferenceArg, *args, **kwargs) -> Optional["Destination"]: + def from_reference(ref: TDestinationReferenceArg, credentials: Optional[CredentialsConfiguration] = None, **kwargs: Any) -> Optional["Destination"]: """Instantiate destination from str reference. The ref can be a destination name or import path pointing to a destination class (e.g. `dlt.destinations.postgres`) """ @@ -463,24 +463,22 @@ def from_reference(ref: TDestinationReferenceArg, *args, **kwargs) -> Optional[" module_path, attr_name = ref.rsplit(".", 1) dest_module = import_module(module_path) else: - from dlt import destinations as dest_module + from dlt import destinations as dest_module attr_name = ref - except ImportError as e: + except ModuleNotFoundError as e: raise UnknownDestinationModule(ref) from e try: factory: Type[Destination] = getattr(dest_module, attr_name) except AttributeError as e: - raise UnknownDestinationModule(ref) from e - return factory(*args, **kwargs) - + raise InvalidDestinationReference(ref) from e + if credentials: + kwargs["credentials"] = credentials + return factory(**kwargs) def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": - # TODO: Raise error somewhere if both DestinationFactory and credentials argument are used together in pipeline - cfg = initial_config.copy() + cfg = initial_config cfg.update(self.config_params) - # for key, value in self.config_params.items(): - # setattr(cfg, key, value) if self.credentials: cfg.credentials = self.credentials return self.client_class(schema, cfg) diff --git a/dlt/destinations/impl/duckdb/configuration.py b/dlt/destinations/impl/duckdb/configuration.py index 82ee325ed3..556a7c9829 100644 --- a/dlt/destinations/impl/duckdb/configuration.py +++ b/dlt/destinations/impl/duckdb/configuration.py @@ -25,6 +25,7 @@ class DuckDbBaseCredentials(ConnectionStringCredentials): read_only: bool = False # open database read/write def borrow_conn(self, read_only: bool) -> Any: + # TODO: Can this be done in sql client instead? import duckdb if not hasattr(self, "_conn_lock"): @@ -95,14 +96,10 @@ class DuckDbCredentials(DuckDbBaseCredentials): __config_gen_annotations__: ClassVar[List[str]] = [] - def on_resolved(self) -> None: - # do not set any paths for external database - if self.database == ":external:": - return - # try the pipeline context + def _database_path(self) -> str: is_default_path = False if self.database == ":pipeline:": - self.database = self._path_in_pipeline(DEFAULT_DUCK_DB_NAME) + return self._path_in_pipeline(DEFAULT_DUCK_DB_NAME) else: # maybe get database maybe_database, maybe_is_default_path = self._path_from_pipeline(DEFAULT_DUCK_DB_NAME) @@ -110,13 +107,14 @@ def on_resolved(self) -> None: if not self.database or not maybe_is_default_path: # create database locally is_default_path = maybe_is_default_path - self.database = maybe_database + path = maybe_database + else: + path = self.database - # always make database an abs path - self.database = os.path.abspath(self.database) - # do not save the default path into pipeline's local state + path = os.path.abspath(path) if not is_default_path: - self._path_to_pipeline(self.database) + self._path_to_pipeline(path) + return path def _path_in_pipeline(self, rel_path: str) -> str: from dlt.common.configuration.container import Container @@ -125,9 +123,10 @@ def _path_in_pipeline(self, rel_path: str) -> str: context = Container()[PipelineContext] if context.is_active(): # pipeline is active, get the working directory - return os.path.join(context.pipeline().working_dir, rel_path) - return None - + abs_path = os.path.abspath(os.path.join(context.pipeline().working_dir, rel_path)) + context.pipeline().set_local_state_val(LOCAL_STATE_KEY, abs_path) + return abs_path + raise RuntimeError("Attempting to use special duckdb database :pipeline: outside of pipeline context.") def _path_to_pipeline(self, abspath: str) -> None: from dlt.common.configuration.container import Container @@ -173,6 +172,9 @@ def _path_from_pipeline(self, default_path: str) -> Tuple[str, bool]: return default_path, True + def _conn_str(self) -> str: + return self._database_path() + @configspec class DuckDbClientConfiguration(DestinationClientDwhWithStagingConfiguration): diff --git a/dlt/destinations/impl/dummy/factory.py b/dlt/destinations/impl/dummy/factory.py index 90136385e1..7a79ddd0a1 100644 --- a/dlt/destinations/impl/dummy/factory.py +++ b/dlt/destinations/impl/dummy/factory.py @@ -15,7 +15,7 @@ class dummy(Destination): spec = DummyClientConfiguration @property - def capabilitites(self) -> DestinationCapabilitiesContext: + def capabilities(self) -> DestinationCapabilitiesContext: return capabilities() @property diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 0a4bc78889..25910235a2 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -900,6 +900,9 @@ def _get_destination_client_initial_config(self, destination: Destination = None client_spec.get_resolvable_fields()["credentials"], credentials ) + + if credentials and not as_staging: + # Explicit pipeline credentials always supersede other credentials destination.credentials = credentials # this client support many schemas and datasets diff --git a/tests/common/test_destination.py b/tests/common/test_destination.py index 00a8480ef4..b1c85bb91f 100644 --- a/tests/common/test_destination.py +++ b/tests/common/test_destination.py @@ -11,7 +11,7 @@ def test_import_unknown_destination() -> None: # standard destination - with pytest.raises(UnknownDestinationModule): + with pytest.raises(InvalidDestinationReference): Destination.from_reference("meltdb") # custom module with pytest.raises(UnknownDestinationModule): @@ -25,9 +25,9 @@ def test_invalid_destination_reference() -> None: def test_import_all_destinations() -> None: # this must pass without the client dependencies being imported - for module in ACTIVE_DESTINATIONS: - dest = Destination.from_reference(module) - assert dest.name == "dlt.destinations." + module + for dest_name in ACTIVE_DESTINATIONS: + dest = Destination.from_reference(dest_name) + assert dest.name == dest_name dest.spec() assert isinstance(dest.capabilities, DestinationCapabilitiesContext) diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py index 9d3faa3881..ace46ebd5e 100644 --- a/tests/load/duckdb/test_duckdb_client.py +++ b/tests/load/duckdb/test_duckdb_client.py @@ -7,6 +7,7 @@ from dlt.common.configuration.utils import get_resolved_traces from dlt.destinations.impl.duckdb.configuration import DUCK_DB_NAME, DuckDbClientConfiguration, DuckDbCredentials, DEFAULT_DUCK_DB_NAME +from dlt.destinations import duckdb from tests.load.pipeline.utils import drop_pipeline, assert_table from tests.utils import patch_home_dir, autouse_test_storage, preserve_environ, TEST_STORAGE_ROOT @@ -46,13 +47,13 @@ def test_duckdb_open_conn_default() -> None: def test_duckdb_database_path() -> None: # resolve without any path provided c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) - assert c.credentials.database.lower() == os.path.abspath("quack.duckdb").lower() + assert c.credentials._database_path().lower() == os.path.abspath("quack.duckdb").lower() # resolve without any path but with pipeline context p = dlt.pipeline(pipeline_name="quack_pipeline") c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) # still cwd db_path = os.path.abspath(os.path.join(".", "quack_pipeline.duckdb")) - assert c.credentials.database.lower() == db_path.lower() + assert c.credentials._database_path().lower() == db_path.lower() # we do not keep default duckdb path in the local state with pytest.raises(KeyError): p.get_local_state_val("duckdb_database") @@ -69,7 +70,7 @@ def test_duckdb_database_path() -> None: # test special :pipeline: path to create in pipeline folder c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=":pipeline:")) db_path = os.path.abspath(os.path.join(p.working_dir, DEFAULT_DUCK_DB_NAME)) - assert c.credentials.database.lower() == db_path.lower() + assert c.credentials._database_path().lower() == db_path.lower() # connect conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) @@ -80,7 +81,7 @@ def test_duckdb_database_path() -> None: # provide relative path db_path = "_storage/test_quack.duckdb" c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials="duckdb:///_storage/test_quack.duckdb")) - assert c.credentials.database.lower() == os.path.abspath(db_path).lower() + assert c.credentials._database_path().lower() == os.path.abspath(db_path).lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -90,7 +91,7 @@ def test_duckdb_database_path() -> None: db_path = os.path.abspath("_storage/abs_test_quack.duckdb") c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=f"duckdb:///{db_path}")) assert os.path.isabs(c.credentials.database) - assert c.credentials.database.lower() == db_path.lower() + assert c.credentials._database_path().lower() == db_path.lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -99,7 +100,7 @@ def test_duckdb_database_path() -> None: # set just path as credentials db_path = "_storage/path_test_quack.duckdb" c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=db_path)) - assert c.credentials.database.lower() == os.path.abspath(db_path).lower() + assert c.credentials._database_path().lower() == os.path.abspath(db_path).lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -108,7 +109,7 @@ def test_duckdb_database_path() -> None: db_path = os.path.abspath("_storage/abs_path_test_quack.duckdb") c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=db_path)) assert os.path.isabs(c.credentials.database) - assert c.credentials.database.lower() == db_path.lower() + assert c.credentials._database_path().lower() == db_path.lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -128,7 +129,7 @@ def test_keeps_initial_db_path() -> None: print(p.pipelines_dir) with p.sql_client() as conn: # still cwd - assert conn.credentials.database.lower() == os.path.abspath(db_path).lower() + assert conn.credentials._database_path().lower() == os.path.abspath(db_path).lower() # but it is kept in the local state assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() @@ -138,7 +139,7 @@ def test_keeps_initial_db_path() -> None: with p.sql_client() as conn: # still cwd assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() - assert conn.credentials.database.lower() == os.path.abspath(db_path).lower() + assert conn.credentials._database_path().lower() == os.path.abspath(db_path).lower() # now create a new pipeline dlt.pipeline(pipeline_name="not_quack", destination="dummy") @@ -147,12 +148,12 @@ def test_keeps_initial_db_path() -> None: assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() # new pipeline context took over # TODO: restore pipeline context on each call - assert conn.credentials.database.lower() != os.path.abspath(db_path).lower() + assert conn.credentials._database_path().lower() != os.path.abspath(db_path).lower() def test_duckdb_database_delete() -> None: db_path = "_storage/path_test_quack.duckdb" - p = dlt.pipeline(pipeline_name="quack_pipeline", credentials=db_path, destination="duckdb") + p = dlt.pipeline(pipeline_name="quack_pipeline", destination=duckdb(credentials=DuckDbCredentials(db_path))) p.run([1, 2, 3], table_name="table", dataset_name="dataset") # attach the pipeline p = dlt.attach(pipeline_name="quack_pipeline") diff --git a/tests/load/utils.py b/tests/load/utils.py index 4b1cfc2f1a..098c5a5509 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -13,7 +13,7 @@ from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.destination.reference import DestinationClientDwhConfiguration, JobClientBase, LoadJob, DestinationClientStagingConfiguration, WithStagingDataset, TDestinationReferenceArg -from dlt.common.destination import TLoaderFileFormat +from dlt.common.destination import TLoaderFileFormat, Destination from dlt.common.data_writers import DataWriter from dlt.common.schema import TColumnSchema, TTableSchemaColumns, Schema from dlt.common.storages import SchemaStorage, FileStorage, SchemaStorageConfiguration @@ -229,10 +229,10 @@ def yield_client( ) -> Iterator[SqlJobClientBase]: os.environ.pop("DATASET_NAME", None) # import destination reference by name - destination = import_module(f"dlt.destinations.impl.{destination_name}") + destination = Destination.from_reference(destination_name) # create initial config dest_config: DestinationClientDwhConfiguration = None - dest_config = destination.spec()() + dest_config = destination.spec() # type: ignore[assignment] dest_config.dataset_name = dataset_name # type: ignore[misc] # TODO: Why is dataset_name final? if default_config_values is not None: @@ -261,7 +261,7 @@ def yield_client( # lookup for credentials in the section that is destination name with Container().injectable_context(ConfigSectionContext(sections=("destination", destination_name,))): - with destination.client(schema, dest_config) as client: + with destination.client(schema, dest_config) as client: # type: ignore[assignment] yield client @contextlib.contextmanager From 4ae4ceb2134485cee8dd3cdaa4f9435a647eca23 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Mon, 13 Nov 2023 19:42:10 -0500 Subject: [PATCH 06/29] Simplify destination config resolution --- dlt/common/configuration/inject.py | 5 ++- dlt/common/destination/reference.py | 38 +++++++++++++++------ dlt/destinations/impl/duckdb/factory.py | 10 +++--- dlt/destinations/impl/dummy/factory.py | 3 +- dlt/destinations/impl/filesystem/factory.py | 3 +- dlt/destinations/impl/mssql/factory.py | 5 ++- dlt/destinations/impl/postgres/factory.py | 6 ++-- dlt/destinations/impl/snowflake/factory.py | 3 +- dlt/pipeline/pipeline.py | 4 --- 9 files changed, 43 insertions(+), 34 deletions(-) diff --git a/dlt/common/configuration/inject.py b/dlt/common/configuration/inject.py index 4e214695f2..f50e947011 100644 --- a/dlt/common/configuration/inject.py +++ b/dlt/common/configuration/inject.py @@ -61,6 +61,7 @@ def with_config( auto_pipeline_section: bool = False, include_defaults: bool = True, accept_partial: bool = False, + initial_config: Optional[BaseConfiguration] = None, ) -> Callable[[TFun], TFun]: """Injects values into decorated function arguments following the specification in `spec` or by deriving one from function's signature. @@ -130,7 +131,9 @@ def _wrap(*args: Any, **kwargs: Any) -> Any: curr_sections = sections # if one of arguments is spec the use it as initial value - if spec_arg: + if initial_config: + config = initial_config + elif spec_arg: config = bound_args.arguments.get(spec_arg.name, None) # resolve SPEC, also provide section_context with pipeline_name if pipeline_name_arg: diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index f232622c52..b7f7e0b23f 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod, abstractproperty from importlib import import_module from types import TracebackType, ModuleType -from typing import ClassVar, Final, Optional, NamedTuple, Literal, Sequence, Iterable, Type, Protocol, Union, TYPE_CHECKING, cast, List, ContextManager, Dict, Any, Callable +from typing import ClassVar, Final, Optional, NamedTuple, Literal, Sequence, Iterable, Type, Protocol, Union, TYPE_CHECKING, cast, List, ContextManager, Dict, Any, Callable, TypeVar from contextlib import contextmanager import datetime # noqa: 251 from copy import deepcopy @@ -12,7 +12,7 @@ from dlt.common.schema.typing import TWriteDisposition from dlt.common.schema.exceptions import InvalidDatasetName from dlt.common.schema.utils import get_write_disposition, get_table_format -from dlt.common.configuration import configspec +from dlt.common.configuration import configspec, with_config, resolve_configuration, known_sections from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration from dlt.common.configuration.accessors import config from dlt.common.destination.capabilities import DestinationCapabilitiesContext @@ -25,6 +25,7 @@ TLoaderReplaceStrategy = Literal["truncate-and-insert", "insert-from-staging", "staging-optimized"] +TDestinationConfig = TypeVar("TDestinationConfig", bound="DestinationClientConfiguration") class StorageSchemaInfo(NamedTuple): @@ -411,11 +412,13 @@ class Destination(ABC): """ credentials: Optional[CredentialsConfiguration] = None config_params: Optional[Dict[str, Any]] = None + initial_config: DestinationClientConfiguration - def __init__(self, cfg: DestinationClientConfiguration) -> None: - cfg_dict = dict(cfg) - self.credentials = cfg_dict.pop("credentials", None) - self.config_params = {key: val for key, val in cfg_dict.items() if val is not None} + def __init__(self, **kwargs: Any) -> None: + self.initial_config = self.spec(**kwargs) + # cfg_dict = dict(cfg) + # self.credentials = cfg_dict.pop("credentials", None) + # self.config_params = {key: val for key, val in cfg_dict.items() if val is not None} @property @abstractmethod @@ -439,6 +442,16 @@ def client_class(self) -> Type[JobClientBase]: """Returns the client class""" ... + def configuration(self, initial_config: TDestinationConfig) -> TDestinationConfig: + """Get a fully resolved destination config from the initial config + """ + return resolve_configuration( + initial_config, + sections=(known_sections.DESTINATION, self.name), + # Already populated values will supersede resolved env config + explicit_value=dict(initial_config) + ) + @staticmethod def to_name(ref: TDestinationReferenceArg) -> str: if ref is None: @@ -477,8 +490,11 @@ def from_reference(ref: TDestinationReferenceArg, credentials: Optional[Credenti return factory(**kwargs) def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": - cfg = initial_config - cfg.update(self.config_params) - if self.credentials: - cfg.credentials = self.credentials - return self.client_class(schema, cfg) + # Create merged config with the pipeline initial cfg and the partial config of this instance + cfg = self.spec( + **dict( + initial_config, + **{k: v for k, v in self.initial_config.items() if v is not None} + ) + ) + return self.client_class(schema, self.configuration(cfg)) diff --git a/dlt/destinations/impl/duckdb/factory.py b/dlt/destinations/impl/duckdb/factory.py index 656e866c54..f8c2d83205 100644 --- a/dlt/destinations/impl/duckdb/factory.py +++ b/dlt/destinations/impl/duckdb/factory.py @@ -2,11 +2,11 @@ from dlt.common.configuration import with_config, known_sections from dlt.common.destination.reference import DestinationClientConfiguration, Destination - from dlt.destinations.impl.duckdb.configuration import DuckDbCredentials, DuckDbClientConfiguration from dlt.destinations.impl.duckdb import capabilities if t.TYPE_CHECKING: + from duckdb import DuckDBPyConnection from dlt.destinations.impl.duckdb.duck import DuckDbClient @@ -21,11 +21,11 @@ def client_class(self) -> t.Type["DuckDbClient"]: return DuckDbClient - @with_config(spec=DuckDbClientConfiguration, sections=(known_sections.DESTINATION, 'duckdb'), accept_partial=True) + # @with_config(spec=DuckDbClientConfiguration, sections=(known_sections.DESTINATION, 'duckdb'), accept_partial=True) def __init__( self, - credentials: DuckDbCredentials = None, - create_indexes: bool = True, + credentials: t.Union[DuckDbCredentials, str, "DuckDBPyConnection"] = None, + create_indexes: bool = False, **kwargs: t.Any, ) -> None: - super().__init__(kwargs['_dlt_config']) + super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) diff --git a/dlt/destinations/impl/dummy/factory.py b/dlt/destinations/impl/dummy/factory.py index 7a79ddd0a1..1c55990b45 100644 --- a/dlt/destinations/impl/dummy/factory.py +++ b/dlt/destinations/impl/dummy/factory.py @@ -24,10 +24,9 @@ def client_class(self) -> t.Type["DummyClient"]: return DummyClient - @with_config(spec=DummyClientConfiguration, sections=(known_sections.DESTINATION, 'dummy'), accept_partial=True) def __init__( self, credentials: DummyClientCredentials = None, **kwargs: t.Any, ) -> None: - super().__init__(kwargs['_dlt_config']) + super().__init__(credentials=credentials, **kwargs) diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py index 13ea3b4ce4..150bf0fdd8 100644 --- a/dlt/destinations/impl/filesystem/factory.py +++ b/dlt/destinations/impl/filesystem/factory.py @@ -21,11 +21,10 @@ def client_class(self) -> t.Type["FilesystemClient"]: return FilesystemClient - @with_config(spec=FilesystemDestinationClientConfiguration, sections=(known_sections.DESTINATION, 'filesystem'), accept_partial=True) def __init__( self, bucket_url: str = None, credentials: FileSystemCredentials = None, **kwargs: t.Any, ) -> None: - super().__init__(kwargs['_dlt_config']) + super().__init__(bucket_url=bucket_url, credentials=credentials, **kwargs) diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py index ee49978fb6..f24f8a646d 100644 --- a/dlt/destinations/impl/mssql/factory.py +++ b/dlt/destinations/impl/mssql/factory.py @@ -21,11 +21,10 @@ def client_class(self) -> t.Type["MsSqlClient"]: return MsSqlClient - @with_config(spec=MsSqlClientConfiguration, sections=(known_sections.DESTINATION, 'mssql'), accept_partial=True) def __init__( self, - credentials: MsSqlCredentials = None, + credentials: t.Union[MsSqlCredentials, str] = None, create_indexes: bool = True, **kwargs: t.Any, ) -> None: - super().__init__(kwargs['_dlt_config']) + super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py index 67e9d99090..d9606ac81d 100644 --- a/dlt/destinations/impl/postgres/factory.py +++ b/dlt/destinations/impl/postgres/factory.py @@ -21,12 +21,10 @@ def client_class(self) -> t.Type["PostgresClient"]: return PostgresClient - - @with_config(spec=PostgresClientConfiguration, sections=(known_sections.DESTINATION, 'postgres'), accept_partial=True) def __init__( self, - credentials: PostgresCredentials = None, + credentials: t.Union[PostgresCredentials, str] = None, create_indexes: bool = True, **kwargs: t.Any, ) -> None: - super().__init__(kwargs['_dlt_config']) + super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py index 6e53d10701..d737aed586 100644 --- a/dlt/destinations/impl/snowflake/factory.py +++ b/dlt/destinations/impl/snowflake/factory.py @@ -21,7 +21,6 @@ def client_class(self) -> t.Type["SnowflakeClient"]: return SnowflakeClient - @with_config(spec=SnowflakeClientConfiguration, sections=(known_sections.DESTINATION, 'snowflake'), accept_partial=True) def __init__( self, credentials: SnowflakeCredentials = None, @@ -29,4 +28,4 @@ def __init__( keep_staged_files: bool = True, **kwargs: t.Any, ) -> None: - super().__init__(kwargs['_dlt_config']) + super().__init__(credentials=credentials, stage_name=stage_name, keep_staged_files=keep_staged_files, **kwargs) diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 25910235a2..003f13fe3d 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -901,10 +901,6 @@ def _get_destination_client_initial_config(self, destination: Destination = None credentials ) - if credentials and not as_staging: - # Explicit pipeline credentials always supersede other credentials - destination.credentials = credentials - # this client support many schemas and datasets if issubclass(client_spec, DestinationClientDwhConfiguration): if not self.dataset_name and self.full_refresh: From bccfa9d306bc138e62901d239a79b1623b0420cb Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Mon, 13 Nov 2023 20:21:41 -0500 Subject: [PATCH 07/29] capabilities are callable --- dlt/common/destination/reference.py | 5 ----- dlt/destinations/impl/duckdb/factory.py | 7 ++++--- dlt/destinations/impl/dummy/factory.py | 4 +--- dlt/destinations/impl/filesystem/factory.py | 7 ++++--- dlt/destinations/impl/mssql/factory.py | 7 ++++--- dlt/destinations/impl/postgres/factory.py | 7 ++++--- dlt/destinations/impl/snowflake/factory.py | 8 ++++---- dlt/load/load.py | 10 +++++----- dlt/pipeline/pipeline.py | 6 +++--- tests/load/pipeline/test_arrow_loading.py | 2 +- 10 files changed, 30 insertions(+), 33 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index b7f7e0b23f..8c19aac35b 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -410,15 +410,11 @@ class Destination(ABC): """A destination factory that can be partially pre-configured with credentials and other config params. """ - credentials: Optional[CredentialsConfiguration] = None config_params: Optional[Dict[str, Any]] = None initial_config: DestinationClientConfiguration def __init__(self, **kwargs: Any) -> None: self.initial_config = self.spec(**kwargs) - # cfg_dict = dict(cfg) - # self.credentials = cfg_dict.pop("credentials", None) - # self.config_params = {key: val for key, val in cfg_dict.items() if val is not None} @property @abstractmethod @@ -426,7 +422,6 @@ def spec(self) -> Type[DestinationClientConfiguration]: """Returns the destination configuration spec""" ... - @property @abstractmethod def capabilities(self) -> DestinationCapabilitiesContext: """Returns the destination capabilities""" diff --git a/dlt/destinations/impl/duckdb/factory.py b/dlt/destinations/impl/duckdb/factory.py index f8c2d83205..22e3c9e819 100644 --- a/dlt/destinations/impl/duckdb/factory.py +++ b/dlt/destinations/impl/duckdb/factory.py @@ -1,7 +1,6 @@ import typing as t -from dlt.common.configuration import with_config, known_sections -from dlt.common.destination.reference import DestinationClientConfiguration, Destination +from dlt.common.destination import Destination, DestinationCapabilitiesContext from dlt.destinations.impl.duckdb.configuration import DuckDbCredentials, DuckDbClientConfiguration from dlt.destinations.impl.duckdb import capabilities @@ -12,9 +11,11 @@ class duckdb(Destination): - capabilities = capabilities() spec = DuckDbClientConfiguration + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + @property def client_class(self) -> t.Type["DuckDbClient"]: from dlt.destinations.impl.duckdb.duck import DuckDbClient diff --git a/dlt/destinations/impl/dummy/factory.py b/dlt/destinations/impl/dummy/factory.py index 1c55990b45..413002ed2d 100644 --- a/dlt/destinations/impl/dummy/factory.py +++ b/dlt/destinations/impl/dummy/factory.py @@ -1,7 +1,6 @@ import typing as t -from dlt.common.configuration import with_config, known_sections -from dlt.common.destination.reference import DestinationClientConfiguration, Destination, DestinationCapabilitiesContext +from dlt.common.destination import Destination, DestinationCapabilitiesContext from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration, DummyClientCredentials from dlt.destinations.impl.dummy import capabilities @@ -14,7 +13,6 @@ class dummy(Destination): spec = DummyClientConfiguration - @property def capabilities(self) -> DestinationCapabilitiesContext: return capabilities() diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py index 150bf0fdd8..4e5aec6b5b 100644 --- a/dlt/destinations/impl/filesystem/factory.py +++ b/dlt/destinations/impl/filesystem/factory.py @@ -2,8 +2,7 @@ from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration from dlt.destinations.impl.filesystem import capabilities -from dlt.common.configuration import with_config, known_sections -from dlt.common.destination.reference import DestinationClientConfiguration, Destination +from dlt.common.destination import Destination, DestinationCapabilitiesContext from dlt.common.storages.configuration import FileSystemCredentials if t.TYPE_CHECKING: @@ -12,9 +11,11 @@ class filesystem(Destination): - capabilities = capabilities() spec = FilesystemDestinationClientConfiguration + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + @property def client_class(self) -> t.Type["FilesystemClient"]: from dlt.destinations.impl.filesystem.filesystem import FilesystemClient diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py index f24f8a646d..7ffc1efa5e 100644 --- a/dlt/destinations/impl/mssql/factory.py +++ b/dlt/destinations/impl/mssql/factory.py @@ -1,7 +1,6 @@ import typing as t -from dlt.common.configuration import with_config, known_sections -from dlt.common.destination.reference import DestinationClientConfiguration, Destination +from dlt.common.destination import Destination, DestinationCapabilitiesContext from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, MsSqlClientConfiguration from dlt.destinations.impl.mssql import capabilities @@ -12,9 +11,11 @@ class mssql(Destination): - capabilities = capabilities() spec = MsSqlClientConfiguration + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + @property def client_class(self) -> t.Type["MsSqlClient"]: from dlt.destinations.impl.mssql.mssql import MsSqlClient diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py index d9606ac81d..41d531b5b8 100644 --- a/dlt/destinations/impl/postgres/factory.py +++ b/dlt/destinations/impl/postgres/factory.py @@ -1,7 +1,6 @@ import typing as t -from dlt.common.configuration import with_config, known_sections -from dlt.common.destination.reference import DestinationClientConfiguration, Destination +from dlt.common.destination import Destination, DestinationCapabilitiesContext from dlt.destinations.impl.postgres.configuration import PostgresCredentials, PostgresClientConfiguration from dlt.destinations.impl.postgres import capabilities @@ -12,9 +11,11 @@ class postgres(Destination): - capabilities = capabilities() spec = PostgresClientConfiguration + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + @property def client_class(self) -> t.Type["PostgresClient"]: from dlt.destinations.impl.postgres.postgres import PostgresClient diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py index d737aed586..58298f890e 100644 --- a/dlt/destinations/impl/snowflake/factory.py +++ b/dlt/destinations/impl/snowflake/factory.py @@ -2,9 +2,7 @@ from dlt.destinations.impl.snowflake.configuration import SnowflakeCredentials, SnowflakeClientConfiguration from dlt.destinations.impl.snowflake import capabilities -from dlt.common.configuration import with_config, known_sections -from dlt.common.destination.reference import DestinationClientConfiguration, Destination -from dlt.common.destination import DestinationCapabilitiesContext +from dlt.common.destination import Destination, DestinationCapabilitiesContext if t.TYPE_CHECKING: from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient @@ -12,9 +10,11 @@ class snowflake(Destination): - capabilities = capabilities() spec = SnowflakeClientConfiguration + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + @property def client_class(self) -> t.Type["SnowflakeClient"]: from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient diff --git a/dlt/load/load.py b/dlt/load/load.py index 76fb842927..0d4cf8cf7f 100644 --- a/dlt/load/load.py +++ b/dlt/load/load.py @@ -56,14 +56,14 @@ def __init__( def create_storage(self, is_storage_owner: bool) -> LoadStorage: - supported_file_formats = self.capabilities.supported_loader_file_formats + supported_file_formats = self.capabilities().supported_loader_file_formats if self.staging_destination: - supported_file_formats = self.staging_destination.capabilities.supported_loader_file_formats + ["reference"] + supported_file_formats = self.staging_destination.capabilities().supported_loader_file_formats + ["reference"] if isinstance(self.get_destination_client(Schema("test")), WithStagingDataset): supported_file_formats += ["sql"] load_storage = LoadStorage( is_storage_owner, - self.capabilities.preferred_loader_file_format, + self.capabilities().preferred_loader_file_format, supported_file_formats, config=self.config._load_storage_config ) @@ -76,7 +76,7 @@ def get_staging_destination_client(self, schema: Schema) -> JobClientBase: return self.staging_destination.client(schema, self.initial_staging_client_config) def is_staging_destination_job(self, file_path: str) -> bool: - return self.staging_destination is not None and os.path.splitext(file_path)[1][1:] in self.staging_destination.capabilities.supported_loader_file_formats + return self.staging_destination is not None and os.path.splitext(file_path)[1][1:] in self.staging_destination.capabilities().supported_loader_file_formats @contextlib.contextmanager def maybe_with_staging_dataset(self, job_client: JobClientBase, use_staging: bool) -> Iterator[None]: @@ -99,7 +99,7 @@ def w_spool_job(self: "Load", file_path: str, load_id: str, schema: Schema) -> O with (self.get_staging_destination_client(schema) if is_staging_destination_job else job_client) as client: job_info = self.load_storage.parse_job_file_name(file_path) if job_info.file_format not in self.load_storage.supported_file_formats: - raise LoadClientUnsupportedFileFormats(job_info.file_format, self.capabilities.supported_loader_file_formats, file_path) + raise LoadClientUnsupportedFileFormats(job_info.file_format, self.capabilities().supported_loader_file_formats, file_path) logger.info(f"Will load file {file_path} with table name {job_info.table_name}") table = client.get_load_table(job_info.table_name) if table["write_disposition"] not in ["append", "replace", "merge"]: diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 003f13fe3d..307d690b21 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -953,10 +953,10 @@ def _get_destination_capabilities(self) -> DestinationCapabilitiesContext: "normalize", "Please provide `destination` argument to `pipeline`, `run` or `load` method directly or via .dlt config.toml file or environment variable." ) - return self.destination.capabilities + return self.destination.capabilities() def _get_staging_capabilities(self) -> Optional[DestinationCapabilitiesContext]: - return self.staging.capabilities if self.staging is not None else None + return self.staging.capabilities() if self.staging is not None else None def _validate_pipeline_name(self) -> None: try: @@ -989,7 +989,7 @@ def _set_destinations(self, destination: TDestinationReferenceArg, staging: TDes if destination: self.destination = Destination.from_reference(destination) - if destination and not self.destination.capabilities.supported_loader_file_formats and not staging: + if destination and not self.destination.capabilities().supported_loader_file_formats and not staging: logger.warning(f"The destination {self.destination.name} requires the filesystem staging destination to be set, but it was not provided. Setting it to 'filesystem'.") staging = "filesystem" diff --git a/tests/load/pipeline/test_arrow_loading.py b/tests/load/pipeline/test_arrow_loading.py index 20cca7602c..bd709e764d 100644 --- a/tests/load/pipeline/test_arrow_loading.py +++ b/tests/load/pipeline/test_arrow_loading.py @@ -71,7 +71,7 @@ def some_data(): for row in expected: for i in range(len(row)): if isinstance(row[i], datetime): - row[i] = reduce_pendulum_datetime_precision(row[i], pipeline.destination.capabilities.timestamp_precision) + row[i] = reduce_pendulum_datetime_precision(row[i], pipeline.destination.capabilities().timestamp_precision) load_id = load_info.loads_ids[0] From 3bde2f49ea873382944483ff71abdf55d953d81b Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Mon, 13 Nov 2023 20:21:58 -0500 Subject: [PATCH 08/29] bigquery, athena factories --- dlt/destinations/impl/athena/factory.py | 35 ++++++++++++ dlt/destinations/impl/bigquery/factory.py | 69 +++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 dlt/destinations/impl/athena/factory.py create mode 100644 dlt/destinations/impl/bigquery/factory.py diff --git a/dlt/destinations/impl/athena/factory.py b/dlt/destinations/impl/athena/factory.py new file mode 100644 index 0000000000..70cce94df0 --- /dev/null +++ b/dlt/destinations/impl/athena/factory.py @@ -0,0 +1,35 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.destinations.impl.athena.configuration import AthenaClientConfiguration +from dlt.common.configuration.specs import AwsCredentials +from dlt.destinations.impl.athena import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.athena.athena import AthenaClient + + +class athena(Destination): + + spec = AthenaClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["AthenaClient"]: + from dlt.destinations.impl.athena.athena import AthenaClient + + return AthenaClient + + def __init__( + self, + query_result_bucket: t.Optional[str] = None, + credentials: t.Optional[AwsCredentials] = None, + athena_work_group: t.Optional[str] = None, + aws_data_catalog: t.Optional[str] = "awsdatacatalog", + supports_truncate_command: bool = False, + force_iceberg: bool = False, + **kwargs: t.Any, + ) -> None: + super().__init__(**kwargs) diff --git a/dlt/destinations/impl/bigquery/factory.py b/dlt/destinations/impl/bigquery/factory.py new file mode 100644 index 0000000000..e81711f1e9 --- /dev/null +++ b/dlt/destinations/impl/bigquery/factory.py @@ -0,0 +1,69 @@ +# import typing as t + +# from dlt.destinations.impl.snowflake.configuration import SnowflakeCredentials, SnowflakeClientConfiguration +# from dlt.destinations.impl.snowflake import capabilities +# from dlt.common.destination import Destination, DestinationCapabilitiesContext +# from dlt.common.destination import DestinationCapabilitiesContext + +# if t.TYPE_CHECKING: +# from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient + + +# class snowflake(Destination): + +# spec = SnowflakeClientConfiguration + +# def capabilities(self) -> DestinationCapabilitiesContext: +# return capabilities() + +# @property +# def client_class(self) -> t.Type["SnowflakeClient"]: +# from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient + +# return SnowflakeClient + +# def __init__( +# self, +# credentials: SnowflakeCredentials = None, +# stage_name: t.Optional[str] = None, +# keep_staged_files: bool = True, +# **kwargs: t.Any, +# ) -> None: +# super().__init__(credentials=credentials, stage_name=stage_name, keep_staged_files=keep_staged_files, **kwargs) + + +import typing as t + +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration +from dlt.common.configuration.specs import GcpServiceAccountCredentials +from dlt.destinations.impl.bigquery import capabilities +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +if t.TYPE_CHECKING: + from dlt.destinations.impl.bigquery.bigquery import BigQueryClient + + +class bigquery(Destination): + + spec = BigQueryClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["BigQueryClient"]: + from dlt.destinations.impl.bigquery.bigquery import BigQueryClient + + return BigQueryClient + + def __init__( + self, + credentials: t.Optional[GcpServiceAccountCredentials] = None, + location: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + super().__init__( + credentials=credentials, + location=location, + **kwargs + ) From 0d59d51acedbe61ba0b7a4e3878ab94edd706815 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Mon, 13 Nov 2023 20:47:52 -0500 Subject: [PATCH 09/29] Add rest of factories --- dlt/common/destination/__init__.py | 1 + dlt/destinations/__init__.py | 10 +++ dlt/destinations/impl/motherduck/factory.py | 31 ++++++++++ dlt/destinations/impl/qdrant/factory.py | 30 +++++++++ dlt/destinations/impl/redshift/factory.py | 67 +++++++++++++++++++++ dlt/destinations/impl/weaviate/factory.py | 30 +++++++++ tests/load/utils.py | 2 +- 7 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 dlt/destinations/impl/motherduck/factory.py create mode 100644 dlt/destinations/impl/qdrant/factory.py create mode 100644 dlt/destinations/impl/redshift/factory.py create mode 100644 dlt/destinations/impl/weaviate/factory.py diff --git a/dlt/common/destination/__init__.py b/dlt/common/destination/__init__.py index f0b22fa67a..4efe764715 100644 --- a/dlt/common/destination/__init__.py +++ b/dlt/common/destination/__init__.py @@ -6,4 +6,5 @@ "TLoaderFileFormat", "ALL_SUPPORTED_FILE_FORMATS", "TDestinationReferenceArg", + "Destination", ] diff --git a/dlt/destinations/__init__.py b/dlt/destinations/__init__.py index 04bc43bc1a..15fe083813 100644 --- a/dlt/destinations/__init__.py +++ b/dlt/destinations/__init__.py @@ -4,6 +4,11 @@ from dlt.destinations.impl.duckdb.factory import duckdb from dlt.destinations.impl.dummy.factory import dummy from dlt.destinations.impl.mssql.factory import mssql +from dlt.destinations.impl.bigquery.factory import bigquery +from dlt.destinations.impl.athena.factory import athena +from dlt.destinations.impl.redshift.factory import redshift +from dlt.destinations.impl.qdrant.factory import qdrant +from dlt.destinations.impl.motherduck.factory import motherduck __all__ = [ @@ -13,4 +18,9 @@ "duckdb", "dummy", "mssql", + "bigquery", + "athena", + "redshift", + "qdrant", + "motherduck", ] diff --git a/dlt/destinations/impl/motherduck/factory.py b/dlt/destinations/impl/motherduck/factory.py new file mode 100644 index 0000000000..6c94642018 --- /dev/null +++ b/dlt/destinations/impl/motherduck/factory.py @@ -0,0 +1,31 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext +from dlt.destinations.impl.motherduck.configuration import MotherDuckCredentials, MotherDuckClientConfiguration +from dlt.destinations.impl.motherduck import capabilities + +if t.TYPE_CHECKING: + from duckdb import DuckDBPyConnection + from dlt.destinations.impl.motherduck.motherduck import MotherDuckClient + + +class motherduck(Destination): + + spec = MotherDuckClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["MotherDuckClient"]: + from dlt.destinations.impl.motherduck.motherduck import MotherDuckClient + + return MotherDuckClient + + def __init__( + self, + credentials: t.Union[MotherDuckCredentials, str, "DuckDBPyConnection"] = None, + create_indexes: bool = False, + **kwargs: t.Any, + ) -> None: + super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) diff --git a/dlt/destinations/impl/qdrant/factory.py b/dlt/destinations/impl/qdrant/factory.py new file mode 100644 index 0000000000..78c533b0aa --- /dev/null +++ b/dlt/destinations/impl/qdrant/factory.py @@ -0,0 +1,30 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.qdrant.configuration import QdrantCredentials, QdrantClientConfiguration +from dlt.destinations.impl.qdrant import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient + + +class qdrant(Destination): + + spec = QdrantClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["QdrantClient"]: + from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient + + return QdrantClient + + def __init__( + self, + credentials: t.Optional[QdrantCredentials] = None, + **kwargs: t.Any, + ) -> None: + super().__init__(credentials=credentials, **kwargs) diff --git a/dlt/destinations/impl/redshift/factory.py b/dlt/destinations/impl/redshift/factory.py new file mode 100644 index 0000000000..59b2267807 --- /dev/null +++ b/dlt/destinations/impl/redshift/factory.py @@ -0,0 +1,67 @@ +# import typing as t + +# from dlt.common.destination import Destination, DestinationCapabilitiesContext + +# from dlt.destinations.impl.postgres.configuration import PostgresCredentials, PostgresClientConfiguration +# from dlt.destinations.impl.postgres import capabilities + +# if t.TYPE_CHECKING: +# from dlt.destinations.impl.postgres.postgres import PostgresClient + + +# class postgres(Destination): + +# spec = PostgresClientConfiguration + +# def capabilities(self) -> DestinationCapabilitiesContext: +# return capabilities() + +# @property +# def client_class(self) -> t.Type["PostgresClient"]: +# from dlt.destinations.impl.postgres.postgres import PostgresClient + +# return PostgresClient + +# def __init__( +# self, +# credentials: t.Union[PostgresCredentials, str] = None, +# create_indexes: bool = True, +# **kwargs: t.Any, +# ) -> None: +# super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) + + +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.redshift.configuration import RedshiftCredentials, RedshiftClientConfiguration +from dlt.destinations.impl.redshift import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.redshift.redshift import RedshiftClient + + +class redshift(Destination): + + spec = RedshiftClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["RedshiftClient"]: + from dlt.destinations.impl.redshift.redshift import RedshiftClient + + return RedshiftClient + + def __init__( + self, + credentials: t.Union[RedshiftCredentials, str] = None, + create_indexes: bool = True, + staging_iam_role: t.Optional[str] = None, + **kwargs: t.Any, + ) -> None: + super().__init__( + credentials=credentials, create_indexes=create_indexes, staging_iam_role=staging_iam_role, **kwargs + ) diff --git a/dlt/destinations/impl/weaviate/factory.py b/dlt/destinations/impl/weaviate/factory.py new file mode 100644 index 0000000000..41818de3f8 --- /dev/null +++ b/dlt/destinations/impl/weaviate/factory.py @@ -0,0 +1,30 @@ +import typing as t + +from dlt.common.destination import Destination, DestinationCapabilitiesContext + +from dlt.destinations.impl.weaviate.configuration import WeaviateCredentials, WeaviateClientConfiguration +from dlt.destinations.impl.weaviate import capabilities + +if t.TYPE_CHECKING: + from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient + + +class weaviate(Destination): + + spec = WeaviateClientConfiguration + + def capabilities(self) -> DestinationCapabilitiesContext: + return capabilities() + + @property + def client_class(self) -> t.Type["WeaviateClient"]: + from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient + + return WeaviateClient + + def __init__( + self, + credentials: t.Optional[WeaviateCredentials] = None, + **kwargs: t.Any, + ) -> None: + super().__init__(credentials=credentials, **kwargs) diff --git a/tests/load/utils.py b/tests/load/utils.py index 098c5a5509..f591f51585 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -237,7 +237,7 @@ def yield_client( if default_config_values is not None: # apply the values to credentials, if dict is provided it will be used as default - dest_config.credentials = default_config_values # type: ignore[assignment] + # dest_config.credentials = default_config_values # type: ignore[assignment] # also apply to config dest_config.update(default_config_values) # get event default schema From 3ee262dddad6fe3256593d94c94264df3c18fed7 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Mon, 13 Nov 2023 21:03:30 -0500 Subject: [PATCH 10/29] Cleanup --- dlt/destinations/impl/athena/__init__.py | 21 ------------ dlt/destinations/impl/bigquery/__init__.py | 24 ------------- dlt/destinations/impl/bigquery/factory.py | 2 +- dlt/destinations/impl/dummy/__init__.py | 15 -------- dlt/destinations/impl/filesystem/__init__.py | 24 ------------- dlt/destinations/impl/motherduck/__init__.py | 24 ------------- dlt/destinations/impl/qdrant/__init__.py | 35 ------------------- dlt/destinations/impl/redshift/__init__.py | 24 ------------- dlt/destinations/impl/weaviate/__init__.py | 36 -------------------- 9 files changed, 1 insertion(+), 204 deletions(-) diff --git a/dlt/destinations/impl/athena/__init__.py b/dlt/destinations/impl/athena/__init__.py index 10157a4a87..9f0b829819 100644 --- a/dlt/destinations/impl/athena/__init__.py +++ b/dlt/destinations/impl/athena/__init__.py @@ -1,18 +1,7 @@ -from typing import Type - from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config -from dlt.common.schema.schema import Schema from dlt.common.data_writers.escape import escape_athena_identifier from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.impl.athena.configuration import AthenaClientConfiguration -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration - -@with_config(spec=AthenaClientConfiguration, sections=(known_sections.DESTINATION, "athena",)) -def _configure(config: AthenaClientConfiguration = config.value) -> AthenaClientConfiguration: - return config def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -37,13 +26,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.timestamp_precision = 3 caps.supports_truncate_command = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.athena.athena import AthenaClient - return AthenaClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return AthenaClientConfiguration diff --git a/dlt/destinations/impl/bigquery/__init__.py b/dlt/destinations/impl/bigquery/__init__.py index e694cccc41..1304bd72bb 100644 --- a/dlt/destinations/impl/bigquery/__init__.py +++ b/dlt/destinations/impl/bigquery/__init__.py @@ -1,20 +1,7 @@ -from typing import Type from dlt.common.data_writers.escape import escape_bigquery_identifier - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration - - -@with_config(spec=BigQueryClientConfiguration, sections=(known_sections.DESTINATION, "bigquery",)) -def _configure(config: BigQueryClientConfiguration = config.value) -> BigQueryClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -35,14 +22,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_ddl_transactions = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.bigquery.bigquery import BigQueryClient - - return BigQueryClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return BigQueryClientConfiguration diff --git a/dlt/destinations/impl/bigquery/factory.py b/dlt/destinations/impl/bigquery/factory.py index e81711f1e9..b6a93ba207 100644 --- a/dlt/destinations/impl/bigquery/factory.py +++ b/dlt/destinations/impl/bigquery/factory.py @@ -60,7 +60,7 @@ def __init__( self, credentials: t.Optional[GcpServiceAccountCredentials] = None, location: t.Optional[str] = None, - **kwargs: t.Any, + **kwargs: t.Any, ) -> None: super().__init__( credentials=credentials, diff --git a/dlt/destinations/impl/dummy/__init__.py b/dlt/destinations/impl/dummy/__init__.py index 2c24b3b16f..476523cb8f 100644 --- a/dlt/destinations/impl/dummy/__init__.py +++ b/dlt/destinations/impl/dummy/__init__.py @@ -1,10 +1,6 @@ -from typing import Type - -from dlt.common.schema.schema import Schema from dlt.common.configuration import with_config, known_sections from dlt.common.configuration.accessors import config from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.destinations.impl.dummy.configuration import DummyClientConfiguration @@ -30,14 +26,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_ddl_transactions = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.dummy.dummy import DummyClient - - return DummyClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return DummyClientConfiguration diff --git a/dlt/destinations/impl/filesystem/__init__.py b/dlt/destinations/impl/filesystem/__init__.py index abe2c4eca9..12e83216cf 100644 --- a/dlt/destinations/impl/filesystem/__init__.py +++ b/dlt/destinations/impl/filesystem/__init__.py @@ -1,29 +1,5 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientDwhWithStagingConfiguration - -from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration - - -@with_config(spec=FilesystemDestinationClientConfiguration, sections=(known_sections.DESTINATION, "filesystem",)) -def _configure(config: FilesystemDestinationClientConfiguration = config.value) -> FilesystemDestinationClientConfiguration: - return config def capabilities() -> DestinationCapabilitiesContext: return DestinationCapabilitiesContext.generic_capabilities("jsonl") - - -def client(schema: Schema, initial_config: DestinationClientDwhWithStagingConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.filesystem.filesystem import FilesystemClient - - return FilesystemClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[FilesystemDestinationClientConfiguration]: - return FilesystemDestinationClientConfiguration diff --git a/dlt/destinations/impl/motherduck/__init__.py b/dlt/destinations/impl/motherduck/__init__.py index 4649ab9bf8..74c0e36ef3 100644 --- a/dlt/destinations/impl/motherduck/__init__.py +++ b/dlt/destinations/impl/motherduck/__init__.py @@ -1,20 +1,7 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_postgres_identifier, escape_duckdb_literal from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.impl.motherduck.configuration import MotherDuckClientConfiguration - - -@with_config(spec=MotherDuckClientConfiguration, sections=(known_sections.DESTINATION, "motherduck",)) -def _configure(config: MotherDuckClientConfiguration = config.value) -> MotherDuckClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -35,14 +22,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_truncate_command = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.motherduck.motherduck import MotherDuckClient - - return MotherDuckClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return MotherDuckClientConfiguration diff --git a/dlt/destinations/impl/qdrant/__init__.py b/dlt/destinations/impl/qdrant/__init__.py index 4bdf7f6b9e..1a2c466b14 100644 --- a/dlt/destinations/impl/qdrant/__init__.py +++ b/dlt/destinations/impl/qdrant/__init__.py @@ -1,30 +1,6 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config -from dlt.common.destination.reference import ( - JobClientBase, - DestinationClientConfiguration, -) from dlt.common.destination import DestinationCapabilitiesContext from dlt.destinations.impl.qdrant.qdrant_adapter import qdrant_adapter -from dlt.destinations.impl.qdrant.configuration import QdrantClientConfiguration - - -@with_config( - spec=QdrantClientConfiguration, - sections=( - known_sections.DESTINATION, - "qdrant", - ), -) -def _configure( - config: QdrantClientConfiguration = config.value, -) -> QdrantClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -40,14 +16,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.supports_ddl_transactions = False return caps - - -def client( - schema: Schema, initial_config: DestinationClientConfiguration = config.value -) -> JobClientBase: - from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient - return QdrantClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[QdrantClientConfiguration]: - return QdrantClientConfiguration diff --git a/dlt/destinations/impl/redshift/__init__.py b/dlt/destinations/impl/redshift/__init__.py index be5052b07b..8a8cae84b4 100644 --- a/dlt/destinations/impl/redshift/__init__.py +++ b/dlt/destinations/impl/redshift/__init__.py @@ -1,20 +1,7 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config from dlt.common.data_writers.escape import escape_redshift_identifier, escape_redshift_literal from dlt.common.destination import DestinationCapabilitiesContext -from dlt.common.destination.reference import JobClientBase, DestinationClientConfiguration from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE -from dlt.destinations.impl.redshift.configuration import RedshiftClientConfiguration - - -@with_config(spec=RedshiftClientConfiguration, sections=(known_sections.DESTINATION, "redshift",)) -def _configure(config: RedshiftClientConfiguration = config.value) -> RedshiftClientConfiguration: - return config - def capabilities() -> DestinationCapabilitiesContext: caps = DestinationCapabilitiesContext() @@ -36,14 +23,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.alter_add_multi_column = False return caps - - -def client(schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> JobClientBase: - # import client when creating instance so capabilities and config specs can be accessed without dependencies installed - from dlt.destinations.impl.redshift.redshift import RedshiftClient - - return RedshiftClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[DestinationClientConfiguration]: - return RedshiftClientConfiguration diff --git a/dlt/destinations/impl/weaviate/__init__.py b/dlt/destinations/impl/weaviate/__init__.py index 36237702a0..fa37d149d2 100644 --- a/dlt/destinations/impl/weaviate/__init__.py +++ b/dlt/destinations/impl/weaviate/__init__.py @@ -1,29 +1,5 @@ -from typing import Type - -from dlt.common.schema.schema import Schema -from dlt.common.configuration import with_config, known_sections -from dlt.common.configuration.accessors import config -from dlt.common.destination.reference import ( - JobClientBase, - DestinationClientConfiguration, -) from dlt.common.destination import DestinationCapabilitiesContext - from dlt.destinations.impl.weaviate.weaviate_adapter import weaviate_adapter -from dlt.destinations.impl.weaviate.configuration import WeaviateClientConfiguration - - -@with_config( - spec=WeaviateClientConfiguration, - sections=( - known_sections.DESTINATION, - "weaviate", - ), -) -def _configure( - config: WeaviateClientConfiguration = config.value, -) -> WeaviateClientConfiguration: - return config def capabilities() -> DestinationCapabilitiesContext: @@ -41,15 +17,3 @@ def capabilities() -> DestinationCapabilitiesContext: caps.naming_convention = "dlt.destinations.weaviate.naming" return caps - - -def client( - schema: Schema, initial_config: DestinationClientConfiguration = config.value -) -> JobClientBase: - from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient - - return WeaviateClient(schema, _configure(initial_config)) # type: ignore - - -def spec() -> Type[WeaviateClientConfiguration]: - return WeaviateClientConfiguration From 9243b2ff50977bddcea65b49151de32d7088197c Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Mon, 13 Nov 2023 21:54:13 -0500 Subject: [PATCH 11/29] Destination type vars --- dlt/common/destination/__init__.py | 3 ++- dlt/common/destination/reference.py | 18 +++++++++++------- dlt/common/pipeline.py | 4 ++-- dlt/destinations/__init__.py | 2 ++ dlt/destinations/impl/athena/factory.py | 2 +- dlt/destinations/impl/bigquery/factory.py | 2 +- dlt/destinations/impl/duckdb/factory.py | 2 +- dlt/destinations/impl/dummy/factory.py | 2 +- dlt/destinations/impl/filesystem/factory.py | 2 +- dlt/destinations/impl/motherduck/factory.py | 2 +- dlt/destinations/impl/mssql/factory.py | 2 +- dlt/destinations/impl/postgres/factory.py | 2 +- dlt/destinations/impl/qdrant/factory.py | 2 +- dlt/destinations/impl/redshift/factory.py | 2 +- dlt/destinations/impl/snowflake/factory.py | 2 +- dlt/destinations/impl/weaviate/factory.py | 2 +- dlt/load/load.py | 6 +++--- dlt/pipeline/__init__.py | 2 +- dlt/pipeline/pipeline.py | 12 ++++++------ .../dbt_tests/test_runner_dbt_versions.py | 2 +- tests/load/filesystem/utils.py | 4 ++-- tests/load/test_dummy_client.py | 4 ++-- tests/load/weaviate/test_weaviate_client.py | 9 +++++---- 23 files changed, 49 insertions(+), 41 deletions(-) diff --git a/dlt/common/destination/__init__.py b/dlt/common/destination/__init__.py index 4efe764715..4857851fa9 100644 --- a/dlt/common/destination/__init__.py +++ b/dlt/common/destination/__init__.py @@ -1,5 +1,5 @@ from dlt.common.destination.capabilities import DestinationCapabilitiesContext, TLoaderFileFormat, ALL_SUPPORTED_FILE_FORMATS -from dlt.common.destination.reference import TDestinationReferenceArg, Destination +from dlt.common.destination.reference import TDestinationReferenceArg, Destination, TDestination __all__ = [ "DestinationCapabilitiesContext", @@ -7,4 +7,5 @@ "ALL_SUPPORTED_FILE_FORMATS", "TDestinationReferenceArg", "Destination", + "TDestination", ] diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 8c19aac35b..5da86f1812 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod, abstractproperty from importlib import import_module from types import TracebackType, ModuleType -from typing import ClassVar, Final, Optional, NamedTuple, Literal, Sequence, Iterable, Type, Protocol, Union, TYPE_CHECKING, cast, List, ContextManager, Dict, Any, Callable, TypeVar +from typing import ClassVar, Final, Optional, NamedTuple, Literal, Sequence, Iterable, Type, Protocol, Union, TYPE_CHECKING, cast, List, ContextManager, Dict, Any, Callable, TypeVar, Generic from contextlib import contextmanager import datetime # noqa: 251 from copy import deepcopy @@ -26,6 +26,7 @@ TLoaderReplaceStrategy = Literal["truncate-and-insert", "insert-from-staging", "staging-optimized"] TDestinationConfig = TypeVar("TDestinationConfig", bound="DestinationClientConfiguration") +TDestinationClient = TypeVar("TDestinationClient", bound="JobClientBase") class StorageSchemaInfo(NamedTuple): @@ -406,7 +407,7 @@ def should_truncate_table_before_load_on_staging_destination(self, table: TTable # return destination.split(".")[-1] # type: ignore -class Destination(ABC): +class Destination(ABC, Generic[TDestinationConfig, TDestinationClient]): """A destination factory that can be partially pre-configured with credentials and other config params. """ @@ -418,7 +419,7 @@ def __init__(self, **kwargs: Any) -> None: @property @abstractmethod - def spec(self) -> Type[DestinationClientConfiguration]: + def spec(self) -> Type[TDestinationConfig]: """Returns the destination configuration spec""" ... @@ -433,7 +434,7 @@ def name(self) -> str: @property @abstractmethod - def client_class(self) -> Type[JobClientBase]: + def client_class(self) -> Type[TDestinationClient]: """Returns the client class""" ... @@ -456,7 +457,7 @@ def to_name(ref: TDestinationReferenceArg) -> str: return ref.name @staticmethod - def from_reference(ref: TDestinationReferenceArg, credentials: Optional[CredentialsConfiguration] = None, **kwargs: Any) -> Optional["Destination"]: + def from_reference(ref: TDestinationReferenceArg, credentials: Optional[CredentialsConfiguration] = None, **kwargs: Any) -> Optional["Destination[DestinationClientConfiguration, JobClientBase]"]: """Instantiate destination from str reference. The ref can be a destination name or import path pointing to a destination class (e.g. `dlt.destinations.postgres`) """ @@ -477,14 +478,14 @@ def from_reference(ref: TDestinationReferenceArg, credentials: Optional[Credenti raise UnknownDestinationModule(ref) from e try: - factory: Type[Destination] = getattr(dest_module, attr_name) + factory: Type[Destination[DestinationClientConfiguration, JobClientBase]] = getattr(dest_module, attr_name) except AttributeError as e: raise InvalidDestinationReference(ref) from e if credentials: kwargs["credentials"] = credentials return factory(**kwargs) - def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": + def client(self, schema: Schema, initial_config: TDestinationConfig = config.value) -> TDestinationClient: # Create merged config with the pipeline initial cfg and the partial config of this instance cfg = self.spec( **dict( @@ -493,3 +494,6 @@ def client(self, schema: Schema, initial_config: DestinationClientConfiguration ) ) return self.client_class(schema, self.configuration(cfg)) + + +TDestination = Destination[DestinationClientConfiguration, JobClientBase] diff --git a/dlt/common/pipeline.py b/dlt/common/pipeline.py index 515ae9c3cf..ddd9003799 100644 --- a/dlt/common/pipeline.py +++ b/dlt/common/pipeline.py @@ -14,7 +14,7 @@ from dlt.common.configuration.specs.config_section_context import ConfigSectionContext from dlt.common.configuration.paths import get_dlt_data_dir from dlt.common.configuration.specs import RunConfiguration -from dlt.common.destination import Destination, TDestinationReferenceArg +from dlt.common.destination import Destination, TDestinationReferenceArg, TDestination from dlt.common.exceptions import DestinationHasFailedJobs, PipelineStateNotAvailable, ResourceNameNotAvailable, SourceSectionNotAvailable from dlt.common.schema import Schema from dlt.common.schema.typing import TColumnNames, TColumnSchema, TWriteDisposition @@ -177,7 +177,7 @@ class SupportsPipeline(Protocol): """Name of the pipeline""" default_schema_name: str """Name of the default schema""" - destination: Destination + destination: TDestination """The destination reference which is ModuleType. `destination.__name__` returns the name string""" dataset_name: str """Name of the dataset to which pipeline will be loaded to""" diff --git a/dlt/destinations/__init__.py b/dlt/destinations/__init__.py index 15fe083813..980c4ce7f2 100644 --- a/dlt/destinations/__init__.py +++ b/dlt/destinations/__init__.py @@ -9,6 +9,7 @@ from dlt.destinations.impl.redshift.factory import redshift from dlt.destinations.impl.qdrant.factory import qdrant from dlt.destinations.impl.motherduck.factory import motherduck +from dlt.destinations.impl.weaviate.factory import weaviate __all__ = [ @@ -23,4 +24,5 @@ "redshift", "qdrant", "motherduck", + "weaviate", ] diff --git a/dlt/destinations/impl/athena/factory.py b/dlt/destinations/impl/athena/factory.py index 70cce94df0..e66f8041d1 100644 --- a/dlt/destinations/impl/athena/factory.py +++ b/dlt/destinations/impl/athena/factory.py @@ -9,7 +9,7 @@ from dlt.destinations.impl.athena.athena import AthenaClient -class athena(Destination): +class athena(Destination[AthenaClientConfiguration, "AthenaClient"]): spec = AthenaClientConfiguration diff --git a/dlt/destinations/impl/bigquery/factory.py b/dlt/destinations/impl/bigquery/factory.py index b6a93ba207..7b4ade37e4 100644 --- a/dlt/destinations/impl/bigquery/factory.py +++ b/dlt/destinations/impl/bigquery/factory.py @@ -43,7 +43,7 @@ from dlt.destinations.impl.bigquery.bigquery import BigQueryClient -class bigquery(Destination): +class bigquery(Destination[BigQueryClientConfiguration, "BigQueryClient"]): spec = BigQueryClientConfiguration diff --git a/dlt/destinations/impl/duckdb/factory.py b/dlt/destinations/impl/duckdb/factory.py index 22e3c9e819..c542239740 100644 --- a/dlt/destinations/impl/duckdb/factory.py +++ b/dlt/destinations/impl/duckdb/factory.py @@ -9,7 +9,7 @@ from dlt.destinations.impl.duckdb.duck import DuckDbClient -class duckdb(Destination): +class duckdb(Destination[DuckDbClientConfiguration, "DuckDbClient"]): spec = DuckDbClientConfiguration diff --git a/dlt/destinations/impl/dummy/factory.py b/dlt/destinations/impl/dummy/factory.py index 413002ed2d..265c77b0f4 100644 --- a/dlt/destinations/impl/dummy/factory.py +++ b/dlt/destinations/impl/dummy/factory.py @@ -9,7 +9,7 @@ from dlt.destinations.impl.dummy.dummy import DummyClient -class dummy(Destination): +class dummy(Destination[DummyClientConfiguration, "DummyClient"]): spec = DummyClientConfiguration diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py index 4e5aec6b5b..95e74b85a9 100644 --- a/dlt/destinations/impl/filesystem/factory.py +++ b/dlt/destinations/impl/filesystem/factory.py @@ -9,7 +9,7 @@ from dlt.destinations.impl.filesystem.filesystem import FilesystemClient -class filesystem(Destination): +class filesystem(Destination[FilesystemDestinationClientConfiguration, "FilesystemClient"]): spec = FilesystemDestinationClientConfiguration diff --git a/dlt/destinations/impl/motherduck/factory.py b/dlt/destinations/impl/motherduck/factory.py index 6c94642018..7c7301043d 100644 --- a/dlt/destinations/impl/motherduck/factory.py +++ b/dlt/destinations/impl/motherduck/factory.py @@ -9,7 +9,7 @@ from dlt.destinations.impl.motherduck.motherduck import MotherDuckClient -class motherduck(Destination): +class motherduck(Destination[MotherDuckClientConfiguration, "MotherDuckClient"]): spec = MotherDuckClientConfiguration diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py index 7ffc1efa5e..542be00e63 100644 --- a/dlt/destinations/impl/mssql/factory.py +++ b/dlt/destinations/impl/mssql/factory.py @@ -9,7 +9,7 @@ from dlt.destinations.impl.mssql.mssql import MsSqlClient -class mssql(Destination): +class mssql(Destination[MsSqlClientConfiguration, "MsSqlClient"]): spec = MsSqlClientConfiguration diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py index 41d531b5b8..208e13efd6 100644 --- a/dlt/destinations/impl/postgres/factory.py +++ b/dlt/destinations/impl/postgres/factory.py @@ -9,7 +9,7 @@ from dlt.destinations.impl.postgres.postgres import PostgresClient -class postgres(Destination): +class postgres(Destination[PostgresClientConfiguration, "PostgresClient"]): spec = PostgresClientConfiguration diff --git a/dlt/destinations/impl/qdrant/factory.py b/dlt/destinations/impl/qdrant/factory.py index 78c533b0aa..5cb436832c 100644 --- a/dlt/destinations/impl/qdrant/factory.py +++ b/dlt/destinations/impl/qdrant/factory.py @@ -9,7 +9,7 @@ from dlt.destinations.impl.qdrant.qdrant_client import QdrantClient -class qdrant(Destination): +class qdrant(Destination[QdrantClientConfiguration, "QdrantClient"]): spec = QdrantClientConfiguration diff --git a/dlt/destinations/impl/redshift/factory.py b/dlt/destinations/impl/redshift/factory.py index 59b2267807..de8964a2c0 100644 --- a/dlt/destinations/impl/redshift/factory.py +++ b/dlt/destinations/impl/redshift/factory.py @@ -42,7 +42,7 @@ from dlt.destinations.impl.redshift.redshift import RedshiftClient -class redshift(Destination): +class redshift(Destination[RedshiftClientConfiguration, "RedshiftClient"]): spec = RedshiftClientConfiguration diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py index 58298f890e..9800461a5e 100644 --- a/dlt/destinations/impl/snowflake/factory.py +++ b/dlt/destinations/impl/snowflake/factory.py @@ -8,7 +8,7 @@ from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient -class snowflake(Destination): +class snowflake(Destination[SnowflakeClientConfiguration, "SnowflakeClient"]): spec = SnowflakeClientConfiguration diff --git a/dlt/destinations/impl/weaviate/factory.py b/dlt/destinations/impl/weaviate/factory.py index 41818de3f8..c89ba22f5d 100644 --- a/dlt/destinations/impl/weaviate/factory.py +++ b/dlt/destinations/impl/weaviate/factory.py @@ -9,7 +9,7 @@ from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient -class weaviate(Destination): +class weaviate(Destination[WeaviateClientConfiguration, "WeaviateClient"]): spec = WeaviateClientConfiguration diff --git a/dlt/load/load.py b/dlt/load/load.py index 0d4cf8cf7f..9015089857 100644 --- a/dlt/load/load.py +++ b/dlt/load/load.py @@ -20,7 +20,7 @@ from dlt.common.schema import Schema, TSchemaTables from dlt.common.schema.typing import TTableSchema, TWriteDisposition from dlt.common.storages import LoadStorage -from dlt.common.destination.reference import DestinationClientDwhConfiguration, FollowupJob, JobClientBase, WithStagingDataset, Destination, LoadJob, NewLoadJob, TLoadJobState, DestinationClientConfiguration, SupportsStagingDestination +from dlt.common.destination.reference import DestinationClientDwhConfiguration, FollowupJob, JobClientBase, WithStagingDataset, Destination, LoadJob, NewLoadJob, TLoadJobState, DestinationClientConfiguration, SupportsStagingDestination, TDestination from dlt.destinations.job_impl import EmptyLoadJob @@ -34,8 +34,8 @@ class Load(Runnable[Executor]): @with_config(spec=LoaderConfiguration, sections=(known_sections.LOAD,)) def __init__( self, - destination: Destination, - staging_destination: Destination = None, + destination: TDestination, + staging_destination: TDestination = None, collector: Collector = NULL_COLLECTOR, is_storage_owner: bool = False, config: LoaderConfiguration = config.value, diff --git a/dlt/pipeline/__init__.py b/dlt/pipeline/__init__.py index 774baa90f0..fdb39d25b1 100644 --- a/dlt/pipeline/__init__.py +++ b/dlt/pipeline/__init__.py @@ -7,7 +7,7 @@ from dlt.common.configuration import with_config from dlt.common.configuration.container import Container from dlt.common.configuration.inject import get_orig_args, last_config -from dlt.common.destination.reference import Destination, TDestinationReferenceArg +from dlt.common.destination import Destination, TDestinationReferenceArg from dlt.common.pipeline import LoadInfo, PipelineContext, get_dlt_pipelines_dir from dlt.pipeline.configuration import PipelineConfiguration, ensure_correct_pipeline_kwargs diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 307d690b21..cf90cf1ff2 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -24,7 +24,7 @@ from dlt.common.typing import TFun, TSecretValue, is_optional_type from dlt.common.runners import pool_runner as runner from dlt.common.storages import LiveSchemaStorage, NormalizeStorage, LoadStorage, SchemaStorage, FileStorage, NormalizeStorageConfiguration, SchemaStorageConfiguration, LoadStorageConfiguration -from dlt.common.destination import DestinationCapabilitiesContext +from dlt.common.destination import DestinationCapabilitiesContext, TDestination from dlt.common.destination.reference import (DestinationClientDwhConfiguration, WithStateSync, Destination, JobClientBase, DestinationClientConfiguration, TDestinationReferenceArg, DestinationClientStagingConfiguration, DestinationClientStagingConfiguration, DestinationClientDwhWithStagingConfiguration) @@ -166,8 +166,8 @@ class Pipeline(SupportsPipeline): """A directory where the pipelines' working directories are created""" working_dir: str """A working directory of the pipeline""" - destination: Destination = None - staging: Destination = None + destination: TDestination = None + staging: TDestination = None """The destination reference which is ModuleType. `destination.name` returns the name string""" dataset_name: str = None """Name of the dataset to which pipeline will be loaded to""" @@ -183,8 +183,8 @@ def __init__( pipeline_name: str, pipelines_dir: str, pipeline_salt: TSecretValue, - destination: Destination, - staging: Destination, + destination: TDestination, + staging: TDestination, dataset_name: str, credentials: Any, import_schema_path: str, @@ -879,7 +879,7 @@ def _extract_source(self, storage: ExtractorStorage, source: DltSource, max_para return extract_id - def _get_destination_client_initial_config(self, destination: Destination = None, credentials: Any = None, as_staging: bool = False) -> DestinationClientConfiguration: + def _get_destination_client_initial_config(self, destination: TDestination = None, credentials: Any = None, as_staging: bool = False) -> DestinationClientConfiguration: destination = destination or self.destination if not destination: raise PipelineConfigMissing( diff --git a/tests/helpers/dbt_tests/test_runner_dbt_versions.py b/tests/helpers/dbt_tests/test_runner_dbt_versions.py index b369c5e64c..1037908e59 100644 --- a/tests/helpers/dbt_tests/test_runner_dbt_versions.py +++ b/tests/helpers/dbt_tests/test_runner_dbt_versions.py @@ -15,7 +15,7 @@ from dlt.common.typing import AnyFun from dlt.destinations.impl.postgres.postgres import PostgresClient -from dlt.destinations.impl.bigquery import BigQueryClientConfiguration +from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration from dlt.helpers.dbt.configuration import DBTRunnerConfiguration from dlt.helpers.dbt.exceptions import PrerequisitesException, DBTProcessingError from dlt.helpers.dbt import package_runner, create_venv, _create_dbt_deps, _default_profile_name, DEFAULT_DBT_VERSION diff --git a/tests/load/filesystem/utils.py b/tests/load/filesystem/utils.py index 6dfd042a4e..8186e82c3b 100644 --- a/tests/load/filesystem/utils.py +++ b/tests/load/filesystem/utils.py @@ -5,7 +5,7 @@ from dlt.load import Load from dlt.common.configuration.container import Container from dlt.common.configuration.specs.config_section_context import ConfigSectionContext -from dlt.common.destination.reference import Destination, LoadJob +from dlt.common.destination.reference import Destination, LoadJob, TDestination from dlt.destinations import filesystem from dlt.destinations.impl.filesystem.filesystem import FilesystemClient from dlt.destinations.job_impl import EmptyLoadJob @@ -13,7 +13,7 @@ def setup_loader(dataset_name: str) -> Load: - destination: Destination = filesystem() + destination: TDestination = filesystem() # type: ignore[assignment] config = filesystem.spec(dataset_name=dataset_name) # setup loader with Container().injectable_context(ConfigSectionContext(sections=('filesystem',))): diff --git a/tests/load/test_dummy_client.py b/tests/load/test_dummy_client.py index a959f6d960..231095dc80 100644 --- a/tests/load/test_dummy_client.py +++ b/tests/load/test_dummy_client.py @@ -11,7 +11,7 @@ from dlt.common.storages import FileStorage, LoadStorage from dlt.common.storages.load_storage import JobWithUnsupportedWriterException from dlt.common.utils import uniq_id -from dlt.common.destination.reference import Destination, LoadJob +from dlt.common.destination.reference import Destination, LoadJob, TDestination from dlt.load import Load from dlt.destinations.job_impl import EmptyLoadJob @@ -445,7 +445,7 @@ def run_all(load: Load) -> None: def setup_loader(delete_completed_jobs: bool = False, client_config: DummyClientConfiguration = None) -> Load: # reset jobs for a test dummy_impl.JOBS = {} - destination: Destination = dummy() + destination: TDestination = dummy() # type: ignore[assignment] client_config = client_config or DummyClientConfiguration(loader_file_format="jsonl") # patch destination to provide client_config # destination.client = lambda schema: dummy_impl.DummyClient(schema, client_config) diff --git a/tests/load/weaviate/test_weaviate_client.py b/tests/load/weaviate/test_weaviate_client.py index 3ae739c90c..49bfd72637 100644 --- a/tests/load/weaviate/test_weaviate_client.py +++ b/tests/load/weaviate/test_weaviate_client.py @@ -8,7 +8,7 @@ from dlt.common.utils import uniq_id from dlt.common.schema.typing import TWriteDisposition, TColumnSchema, TTableSchemaColumns -from dlt.destinations.impl import weaviate +from dlt.destinations import weaviate from dlt.destinations.impl.weaviate.exceptions import PropertyNameConflict from dlt.destinations.impl.weaviate.weaviate_client import WeaviateClient @@ -27,9 +27,10 @@ def drop_weaviate_schema() -> Iterator[None]: def get_client_instance(schema: Schema) -> WeaviateClient: - config = weaviate.spec()(dataset_name="ClientTest" + uniq_id()) - with Container().injectable_context(ConfigSectionContext(sections=('destination', 'weaviate'))): - return weaviate.client(schema, config) # type: ignore[return-value] + dest = weaviate(dataset_name="ClientTest" + uniq_id()) + return dest.client(schema, dest.spec()) + # with Container().injectable_context(ConfigSectionContext(sections=('destination', 'weaviate'))): + # return dest.client(schema, config) @pytest.fixture(scope='function') From 9ad561cbac949e32469bc8d9da986dd6c338bab7 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Mon, 13 Nov 2023 21:55:05 -0500 Subject: [PATCH 12/29] Cleanup --- dlt/common/destination/reference.py | 57 ----------------------------- 1 file changed, 57 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 5da86f1812..8c01de61f8 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -350,63 +350,6 @@ def should_truncate_table_before_load_on_staging_destination(self, table: TTable TDestinationReferenceArg = Union[str, "Destination", None] -# class DestinationReference(Protocol): -# __name__: str -# """Name of the destination""" - -# def capabilities(self) -> DestinationCapabilitiesContext: -# """Destination capabilities ie. supported loader file formats, identifier name lengths, naming conventions, escape function etc.""" - -# def client(self, schema: Schema, initial_config: DestinationClientConfiguration = config.value) -> "JobClientBase": -# """A job client responsible for starting and resuming load jobs""" - -# def spec(self) -> Type[DestinationClientConfiguration]: -# """A spec of destination configuration that also contains destination credentials""" - -# @staticmethod -# def from_name(destination: TDestinationReferenceArg) -> "DestinationReference": -# if destination is None: -# return None - -# # if destination is a str, get destination reference by dynamically importing module -# if isinstance(destination, str): -# try: -# if "." in destination: -# # this is full module name -# destination_ref = cast(DestinationReference, import_module(destination)) -# else: -# # from known location -# destination_ref = cast(DestinationReference, import_module(f"dlt.destinations.impl.{destination}")) -# except ImportError: -# if "." in destination: -# raise UnknownDestinationModule(destination) -# else: -# # allow local external module imported without dot -# try: -# destination_ref = cast(DestinationReference, import_module(destination)) -# except ImportError: -# raise UnknownDestinationModule(destination) -# else: -# destination_ref = cast(DestinationReference, destination) - -# # make sure the reference is correct -# try: -# c = destination_ref.spec() -# c.credentials -# except Exception: -# raise InvalidDestinationReference(destination) - -# return destination_ref - -# @staticmethod -# def to_name(destination: TDestinationReferenceArg) -> str: -# if isinstance(destination, DestinationFactory): -# return destination.name -# if isinstance(destination, ModuleType): -# return get_module_name(destination) -# return destination.split(".")[-1] # type: ignore - - class Destination(ABC, Generic[TDestinationConfig, TDestinationClient]): """A destination factory that can be partially pre-configured with credentials and other config params. From 007d7a3dcceb53d7496011c5e7bfcd8c259d9b21 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Tue, 14 Nov 2023 15:03:59 -0500 Subject: [PATCH 13/29] Fix test --- tests/common/test_destination.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/common/test_destination.py b/tests/common/test_destination.py index b1c85bb91f..53cf8185d7 100644 --- a/tests/common/test_destination.py +++ b/tests/common/test_destination.py @@ -29,7 +29,7 @@ def test_import_all_destinations() -> None: dest = Destination.from_reference(dest_name) assert dest.name == dest_name dest.spec() - assert isinstance(dest.capabilities, DestinationCapabilitiesContext) + assert isinstance(dest.capabilities(), DestinationCapabilitiesContext) def test_normalize_dataset_name() -> None: From 62b5a572dd52d0678be37d0e22ab46065db99e3b Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Tue, 14 Nov 2023 15:04:07 -0500 Subject: [PATCH 14/29] Create initial config from non-defaults only --- dlt/common/destination/reference.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 8c01de61f8..ef770512ef 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -5,6 +5,7 @@ from contextlib import contextmanager import datetime # noqa: 251 from copy import deepcopy +import inspect from dlt.common import logger from dlt.common.exceptions import IdentifierTooLongException, InvalidDestinationReference, UnknownDestinationModule @@ -358,7 +359,16 @@ class Destination(ABC, Generic[TDestinationConfig, TDestinationClient]): initial_config: DestinationClientConfiguration def __init__(self, **kwargs: Any) -> None: - self.initial_config = self.spec(**kwargs) + # Create initial unresolved destination config + # Argument defaults are filtered out here because we only want arguments passed explicitly + # to supersede config from the environment or pipeline args + sig = inspect.signature(self.__class__) + params = sig.parameters + config_args = { + k: v for k, v in kwargs.items() + if k not in params or v != params[k].default + } + self.initial_config = self.spec(**config_args) @property @abstractmethod From d77b54abbf2db0ed1a1b46c5fa5d518c83e8e7dc Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Tue, 14 Nov 2023 15:34:15 -0500 Subject: [PATCH 15/29] Update naming convention path --- dlt/destinations/impl/weaviate/__init__.py | 2 +- tests/load/weaviate/test_weaviate_client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dlt/destinations/impl/weaviate/__init__.py b/dlt/destinations/impl/weaviate/__init__.py index fa37d149d2..143e0260d2 100644 --- a/dlt/destinations/impl/weaviate/__init__.py +++ b/dlt/destinations/impl/weaviate/__init__.py @@ -14,6 +14,6 @@ def capabilities() -> DestinationCapabilitiesContext: caps.max_text_data_type_length = 8 * 1024 * 1024 caps.is_max_text_data_type_length_in_bytes = False caps.supports_ddl_transactions = False - caps.naming_convention = "dlt.destinations.weaviate.naming" + caps.naming_convention = "dlt.destinations.impl.weaviate.naming" return caps diff --git a/tests/load/weaviate/test_weaviate_client.py b/tests/load/weaviate/test_weaviate_client.py index 49bfd72637..ca9d853d98 100644 --- a/tests/load/weaviate/test_weaviate_client.py +++ b/tests/load/weaviate/test_weaviate_client.py @@ -45,7 +45,7 @@ def ci_client() -> Iterator[WeaviateClient]: def make_client(naming_convention: str) -> Iterator[WeaviateClient]: schema = Schema('test_schema', { - 'names': f"dlt.destinations.weaviate.{naming_convention}", + 'names': f"dlt.destinations.impl.weaviate.{naming_convention}", 'json': None }) _client = get_client_instance(schema) From 0d4ad4978ba1affa8b552475b99e770f4fa2c855 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Tue, 14 Nov 2023 18:00:52 -0500 Subject: [PATCH 16/29] Fix config in bigquery location test --- tests/load/bigquery/test_bigquery_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/load/bigquery/test_bigquery_client.py b/tests/load/bigquery/test_bigquery_client.py index 9985147748..abbaf8d414 100644 --- a/tests/load/bigquery/test_bigquery_client.py +++ b/tests/load/bigquery/test_bigquery_client.py @@ -242,7 +242,7 @@ def test_bigquery_job_errors(client: BigQueryClient, file_storage: FileStorage) @pytest.mark.parametrize('location', ["US", "EU"]) def test_bigquery_location(location: str, file_storage: FileStorage) -> None: - with cm_yield_client_with_storage("bigquery", default_config_values={"location": location}) as client: + with cm_yield_client_with_storage("bigquery", default_config_values={"credentials": {"location": location}}) as client: user_table_name = prepare_table(client) load_json = { "_dlt_id": uniq_id(), From f657071296eb5e925afe6a7f1a8931b9d30bf369 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Tue, 14 Nov 2023 18:44:43 -0500 Subject: [PATCH 17/29] Only keep non-default config args in factory --- dlt/common/destination/reference.py | 10 ++--- dlt/destinations/impl/filesystem/factory.py | 2 +- dlt/load/load.py | 9 ++--- tests/pipeline/test_pipeline.py | 44 +++++++++++++++++++++ 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index ef770512ef..9434cbb136 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -356,7 +356,6 @@ class Destination(ABC, Generic[TDestinationConfig, TDestinationClient]): with credentials and other config params. """ config_params: Optional[Dict[str, Any]] = None - initial_config: DestinationClientConfiguration def __init__(self, **kwargs: Any) -> None: # Create initial unresolved destination config @@ -364,11 +363,10 @@ def __init__(self, **kwargs: Any) -> None: # to supersede config from the environment or pipeline args sig = inspect.signature(self.__class__) params = sig.parameters - config_args = { + self.config_params = { k: v for k, v in kwargs.items() if k not in params or v != params[k].default } - self.initial_config = self.spec(**config_args) @property @abstractmethod @@ -398,7 +396,7 @@ def configuration(self, initial_config: TDestinationConfig) -> TDestinationConfi initial_config, sections=(known_sections.DESTINATION, self.name), # Already populated values will supersede resolved env config - explicit_value=dict(initial_config) + explicit_value=self.config_params ) @staticmethod @@ -439,11 +437,13 @@ def from_reference(ref: TDestinationReferenceArg, credentials: Optional[Credenti return factory(**kwargs) def client(self, schema: Schema, initial_config: TDestinationConfig = config.value) -> TDestinationClient: + return self.client_class(schema, self.configuration(initial_config)) # Create merged config with the pipeline initial cfg and the partial config of this instance + cfg = self.spec( **dict( initial_config, - **{k: v for k, v in self.initial_config.items() if v is not None} + **{k: v for k, v in self.config_params.items() if v is not None} ) ) return self.client_class(schema, self.configuration(cfg)) diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py index 95e74b85a9..d1251e51ec 100644 --- a/dlt/destinations/impl/filesystem/factory.py +++ b/dlt/destinations/impl/filesystem/factory.py @@ -25,7 +25,7 @@ def client_class(self) -> t.Type["FilesystemClient"]: def __init__( self, bucket_url: str = None, - credentials: FileSystemCredentials = None, + credentials: t.Union[FileSystemCredentials, t.Dict[str, t.Any]] = None, **kwargs: t.Any, ) -> None: super().__init__(bucket_url=bucket_url, credentials=credentials, **kwargs) diff --git a/dlt/load/load.py b/dlt/load/load.py index 9015089857..2f8a6ae04a 100644 --- a/dlt/load/load.py +++ b/dlt/load/load.py @@ -47,23 +47,22 @@ def __init__( self.initial_client_config = initial_client_config self.initial_staging_client_config = initial_staging_client_config self.destination = destination - self.capabilities = destination.capabilities + self.capabilities = destination.capabilities() self.staging_destination = staging_destination self.pool = NullExecutor() self.load_storage: LoadStorage = self.create_storage(is_storage_owner) self._processed_load_ids: Dict[str, str] = {} """Load ids to dataset name""" - def create_storage(self, is_storage_owner: bool) -> LoadStorage: - supported_file_formats = self.capabilities().supported_loader_file_formats + supported_file_formats = self.capabilities.supported_loader_file_formats if self.staging_destination: supported_file_formats = self.staging_destination.capabilities().supported_loader_file_formats + ["reference"] if isinstance(self.get_destination_client(Schema("test")), WithStagingDataset): supported_file_formats += ["sql"] load_storage = LoadStorage( is_storage_owner, - self.capabilities().preferred_loader_file_format, + self.capabilities.preferred_loader_file_format, supported_file_formats, config=self.config._load_storage_config ) @@ -99,7 +98,7 @@ def w_spool_job(self: "Load", file_path: str, load_id: str, schema: Schema) -> O with (self.get_staging_destination_client(schema) if is_staging_destination_job else job_client) as client: job_info = self.load_storage.parse_job_file_name(file_path) if job_info.file_format not in self.load_storage.supported_file_formats: - raise LoadClientUnsupportedFileFormats(job_info.file_format, self.capabilities().supported_loader_file_formats, file_path) + raise LoadClientUnsupportedFileFormats(job_info.file_format, self.capabilities.supported_loader_file_formats, file_path) logger.info(f"Will load file {file_path} with table name {job_info.table_name}") table = client.get_load_table(job_info.table_name) if table["write_disposition"] not in ["append", "replace", "merge"]: diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 78f5ffade6..d9625d4a33 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -21,7 +21,9 @@ from dlt.common.runtime.collector import AliveCollector, EnlightenCollector, LogCollector, TqdmCollector from dlt.common.schema.utils import new_column, new_table from dlt.common.utils import uniq_id +from dlt.common.schema import Schema +from dlt.destinations import filesystem, redshift, dummy from dlt.extract.exceptions import InvalidResourceDataTypeBasic, PipeGenInvalid, SourceExhausted from dlt.extract.extract import ExtractorStorage from dlt.extract.source import DltResource, DltSource @@ -228,6 +230,48 @@ def test_destination_explicit_credentials(environment: Any) -> None: assert config.credentials.is_resolved() +def test_destination_staging_config(environment: Any) -> None: + p = dlt.pipeline( + pipeline_name="staging_pipeline", + destination=redshift(credentials="redshift://loader:loader@localhost:5432/dlt_data"), + staging=filesystem("s3://testing-bucket", credentials={"aws_access_key_id": "key_id", "aws_secret_access_key": "key"}) + ) + schema = Schema("foo") + p._inject_schema(schema) + client, staging = p._get_destination_clients(p.default_schema) + + # Ensure that as_staging flag is set in the final resolved conifg + assert staging.config.as_staging is True # type: ignore[attr-defined] + + +def test_destination_factory_defaults_resolve_from_config(environment: Any) -> None: + """Params passed explicitly to destination supersede config values. + Env config values supersede default values. + """ + environment["FAIL_PROB"] = "0.3" + environment["RETRY_PROB"] = "0.8" + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=dummy(retry_prob=0.5)) + + client = p.destination_client() + + assert client.config.fail_prob == 0.3 # type: ignore[attr-defined] + assert client.config.retry_prob == 0.5 # type: ignore[attr-defined] + + +def test_destination_credentials_in_factory(environment: Any) -> None: + os.environ['DESTINATION__REDSHIFT__CREDENTIALS'] = "redshift://abc:123@localhost:5432/some_db" + + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=redshift(credentials="redshift://abc:123@localhost:5432/other_db")) + + # Explicit factory arg supersedes config + assert p.destination_client().config.credentials.database == "other_db" # type: ignore[attr-defined] + + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=redshift()) + + # Config value is used if no explicit arg is passed + assert p.destination_client().config.credentials.database == "some_db" # type: ignore[attr-defined] + + @pytest.mark.skip(reason="does not work on CI. probably takes right credentials from somewhere....") def test_destination_explicit_invalid_credentials_filesystem(environment: Any) -> None: # if string cannot be parsed From 56e922a02ec3662b213461e63a52c2fb91579f99 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Tue, 14 Nov 2023 20:45:43 -0500 Subject: [PATCH 18/29] Resolve duckdb credentials in pipeline context --- dlt/destinations/impl/duckdb/configuration.py | 32 ++++++++++++------- dlt/pipeline/__init__.py | 2 +- tests/load/duckdb/test_duckdb_client.py | 20 ++++++------ tests/pipeline/test_pipeline.py | 4 +-- 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/dlt/destinations/impl/duckdb/configuration.py b/dlt/destinations/impl/duckdb/configuration.py index 556a7c9829..a5f77be8fd 100644 --- a/dlt/destinations/impl/duckdb/configuration.py +++ b/dlt/destinations/impl/duckdb/configuration.py @@ -96,10 +96,21 @@ class DuckDbCredentials(DuckDbBaseCredentials): __config_gen_annotations__: ClassVar[List[str]] = [] - def _database_path(self) -> str: + def is_partial(self) -> bool: + partial = super().is_partial() + if partial: + return True + # Wait until pipeline context is set up before resolving + return self.database == ":pipeline:" + + def on_resolved(self) -> None: + # do not set any paths for external database + if self.database == ":external:": + return + # try the pipeline context is_default_path = False if self.database == ":pipeline:": - return self._path_in_pipeline(DEFAULT_DUCK_DB_NAME) + self.database = self._path_in_pipeline(DEFAULT_DUCK_DB_NAME) else: # maybe get database maybe_database, maybe_is_default_path = self._path_from_pipeline(DEFAULT_DUCK_DB_NAME) @@ -107,14 +118,13 @@ def _database_path(self) -> str: if not self.database or not maybe_is_default_path: # create database locally is_default_path = maybe_is_default_path - path = maybe_database - else: - path = self.database + self.database = maybe_database - path = os.path.abspath(path) + # always make database an abs path + self.database = os.path.abspath(self.database) + # do not save the default path into pipeline's local state if not is_default_path: - self._path_to_pipeline(path) - return path + self._path_to_pipeline(self.database) def _path_in_pipeline(self, rel_path: str) -> str: from dlt.common.configuration.container import Container @@ -123,9 +133,7 @@ def _path_in_pipeline(self, rel_path: str) -> str: context = Container()[PipelineContext] if context.is_active(): # pipeline is active, get the working directory - abs_path = os.path.abspath(os.path.join(context.pipeline().working_dir, rel_path)) - context.pipeline().set_local_state_val(LOCAL_STATE_KEY, abs_path) - return abs_path + return os.path.join(context.pipeline().working_dir, rel_path) raise RuntimeError("Attempting to use special duckdb database :pipeline: outside of pipeline context.") def _path_to_pipeline(self, abspath: str) -> None: @@ -173,7 +181,7 @@ def _path_from_pipeline(self, default_path: str) -> Tuple[str, bool]: return default_path, True def _conn_str(self) -> str: - return self._database_path() + return self.database @configspec diff --git a/dlt/pipeline/__init__.py b/dlt/pipeline/__init__.py index fdb39d25b1..3faad8f5a0 100644 --- a/dlt/pipeline/__init__.py +++ b/dlt/pipeline/__init__.py @@ -116,7 +116,7 @@ def pipeline( if not pipelines_dir: pipelines_dir = get_dlt_pipelines_dir() - destination = Destination.from_reference(destination or kwargs["destination_name"], credentials=credentials) + destination = Destination.from_reference(destination or kwargs["destination_name"]) staging = Destination.from_reference(staging or kwargs.get("staging_name", None)) if staging is not None else None progress = collector_from_name(progress) diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py index ace46ebd5e..f5d2f92a4c 100644 --- a/tests/load/duckdb/test_duckdb_client.py +++ b/tests/load/duckdb/test_duckdb_client.py @@ -47,13 +47,13 @@ def test_duckdb_open_conn_default() -> None: def test_duckdb_database_path() -> None: # resolve without any path provided c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) - assert c.credentials._database_path().lower() == os.path.abspath("quack.duckdb").lower() + assert c.credentials._conn_str().lower() == os.path.abspath("quack.duckdb").lower() # resolve without any path but with pipeline context p = dlt.pipeline(pipeline_name="quack_pipeline") c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset")) # still cwd db_path = os.path.abspath(os.path.join(".", "quack_pipeline.duckdb")) - assert c.credentials._database_path().lower() == db_path.lower() + assert c.credentials._conn_str().lower() == db_path.lower() # we do not keep default duckdb path in the local state with pytest.raises(KeyError): p.get_local_state_val("duckdb_database") @@ -70,7 +70,7 @@ def test_duckdb_database_path() -> None: # test special :pipeline: path to create in pipeline folder c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=":pipeline:")) db_path = os.path.abspath(os.path.join(p.working_dir, DEFAULT_DUCK_DB_NAME)) - assert c.credentials._database_path().lower() == db_path.lower() + assert c.credentials._conn_str().lower() == db_path.lower() # connect conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) @@ -81,7 +81,7 @@ def test_duckdb_database_path() -> None: # provide relative path db_path = "_storage/test_quack.duckdb" c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials="duckdb:///_storage/test_quack.duckdb")) - assert c.credentials._database_path().lower() == os.path.abspath(db_path).lower() + assert c.credentials._conn_str().lower() == os.path.abspath(db_path).lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -91,7 +91,7 @@ def test_duckdb_database_path() -> None: db_path = os.path.abspath("_storage/abs_test_quack.duckdb") c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=f"duckdb:///{db_path}")) assert os.path.isabs(c.credentials.database) - assert c.credentials._database_path().lower() == db_path.lower() + assert c.credentials._conn_str().lower() == db_path.lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -100,7 +100,7 @@ def test_duckdb_database_path() -> None: # set just path as credentials db_path = "_storage/path_test_quack.duckdb" c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=db_path)) - assert c.credentials._database_path().lower() == os.path.abspath(db_path).lower() + assert c.credentials._conn_str().lower() == os.path.abspath(db_path).lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -109,7 +109,7 @@ def test_duckdb_database_path() -> None: db_path = os.path.abspath("_storage/abs_path_test_quack.duckdb") c = resolve_configuration(DuckDbClientConfiguration(dataset_name="test_dataset", credentials=db_path)) assert os.path.isabs(c.credentials.database) - assert c.credentials._database_path().lower() == db_path.lower() + assert c.credentials._conn_str().lower() == db_path.lower() conn = c.credentials.borrow_conn(read_only=False) c.credentials.return_conn(conn) assert os.path.isfile(db_path) @@ -129,7 +129,7 @@ def test_keeps_initial_db_path() -> None: print(p.pipelines_dir) with p.sql_client() as conn: # still cwd - assert conn.credentials._database_path().lower() == os.path.abspath(db_path).lower() + assert conn.credentials._conn_str().lower() == os.path.abspath(db_path).lower() # but it is kept in the local state assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() @@ -139,7 +139,7 @@ def test_keeps_initial_db_path() -> None: with p.sql_client() as conn: # still cwd assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() - assert conn.credentials._database_path().lower() == os.path.abspath(db_path).lower() + assert conn.credentials._conn_str().lower() == os.path.abspath(db_path).lower() # now create a new pipeline dlt.pipeline(pipeline_name="not_quack", destination="dummy") @@ -148,7 +148,7 @@ def test_keeps_initial_db_path() -> None: assert p.get_local_state_val("duckdb_database").lower() == os.path.abspath(db_path).lower() # new pipeline context took over # TODO: restore pipeline context on each call - assert conn.credentials._database_path().lower() != os.path.abspath(db_path).lower() + assert conn.credentials._conn_str().lower() != os.path.abspath(db_path).lower() def test_duckdb_database_delete() -> None: diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index d9625d4a33..92975af96d 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -16,7 +16,7 @@ from dlt.common.configuration.specs.gcp_credentials import GcpOAuthCredentials from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.capabilities import TLoaderFileFormat -from dlt.common.exceptions import DestinationHasFailedJobs, DestinationTerminalException, PipelineStateNotAvailable, UnknownDestinationModule +from dlt.common.exceptions import DestinationHasFailedJobs, DestinationTerminalException, PipelineStateNotAvailable, InvalidDestinationReference from dlt.common.pipeline import PipelineContext from dlt.common.runtime.collector import AliveCollector, EnlightenCollector, LogCollector, TqdmCollector from dlt.common.schema.utils import new_column, new_table @@ -165,7 +165,7 @@ def test_pipeline_context() -> None: def test_import_unknown_destination() -> None: - with pytest.raises(UnknownDestinationModule): + with pytest.raises(InvalidDestinationReference): dlt.pipeline(destination="!") From 08a8fdc13b542a20e7ddfb9d32932e4069a71bb0 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Tue, 14 Nov 2023 20:47:05 -0500 Subject: [PATCH 19/29] Cleanup --- dlt/common/destination/reference.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 9434cbb136..8642250295 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -438,15 +438,6 @@ def from_reference(ref: TDestinationReferenceArg, credentials: Optional[Credenti def client(self, schema: Schema, initial_config: TDestinationConfig = config.value) -> TDestinationClient: return self.client_class(schema, self.configuration(initial_config)) - # Create merged config with the pipeline initial cfg and the partial config of this instance - - cfg = self.spec( - **dict( - initial_config, - **{k: v for k, v in self.config_params.items() if v is not None} - ) - ) - return self.client_class(schema, self.configuration(cfg)) TDestination = Destination[DestinationClientConfiguration, JobClientBase] From 2567ca0fd431ba820d5a508b49106a57b16ec794 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 15 Nov 2023 17:36:38 -0500 Subject: [PATCH 20/29] Union credentials arguments --- dlt/destinations/impl/athena/factory.py | 2 +- dlt/destinations/impl/bigquery/factory.py | 34 --------------------- dlt/destinations/impl/duckdb/factory.py | 2 +- dlt/destinations/impl/filesystem/factory.py | 2 +- dlt/destinations/impl/motherduck/factory.py | 2 +- dlt/destinations/impl/mssql/factory.py | 2 +- dlt/destinations/impl/postgres/factory.py | 2 +- dlt/destinations/impl/qdrant/factory.py | 2 +- dlt/destinations/impl/redshift/factory.py | 2 +- dlt/destinations/impl/snowflake/factory.py | 2 +- dlt/destinations/impl/weaviate/factory.py | 2 +- 11 files changed, 10 insertions(+), 44 deletions(-) diff --git a/dlt/destinations/impl/athena/factory.py b/dlt/destinations/impl/athena/factory.py index e66f8041d1..53fe9e4e1c 100644 --- a/dlt/destinations/impl/athena/factory.py +++ b/dlt/destinations/impl/athena/factory.py @@ -25,7 +25,7 @@ def client_class(self) -> t.Type["AthenaClient"]: def __init__( self, query_result_bucket: t.Optional[str] = None, - credentials: t.Optional[AwsCredentials] = None, + credentials: t.Union[AwsCredentials, t.Dict[str, t.Any], t.Any] = None, athena_work_group: t.Optional[str] = None, aws_data_catalog: t.Optional[str] = "awsdatacatalog", supports_truncate_command: bool = False, diff --git a/dlt/destinations/impl/bigquery/factory.py b/dlt/destinations/impl/bigquery/factory.py index 7b4ade37e4..ce6ace3bf7 100644 --- a/dlt/destinations/impl/bigquery/factory.py +++ b/dlt/destinations/impl/bigquery/factory.py @@ -1,37 +1,3 @@ -# import typing as t - -# from dlt.destinations.impl.snowflake.configuration import SnowflakeCredentials, SnowflakeClientConfiguration -# from dlt.destinations.impl.snowflake import capabilities -# from dlt.common.destination import Destination, DestinationCapabilitiesContext -# from dlt.common.destination import DestinationCapabilitiesContext - -# if t.TYPE_CHECKING: -# from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient - - -# class snowflake(Destination): - -# spec = SnowflakeClientConfiguration - -# def capabilities(self) -> DestinationCapabilitiesContext: -# return capabilities() - -# @property -# def client_class(self) -> t.Type["SnowflakeClient"]: -# from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient - -# return SnowflakeClient - -# def __init__( -# self, -# credentials: SnowflakeCredentials = None, -# stage_name: t.Optional[str] = None, -# keep_staged_files: bool = True, -# **kwargs: t.Any, -# ) -> None: -# super().__init__(credentials=credentials, stage_name=stage_name, keep_staged_files=keep_staged_files, **kwargs) - - import typing as t from dlt.destinations.impl.bigquery.configuration import BigQueryClientConfiguration diff --git a/dlt/destinations/impl/duckdb/factory.py b/dlt/destinations/impl/duckdb/factory.py index c542239740..d7c883e2b5 100644 --- a/dlt/destinations/impl/duckdb/factory.py +++ b/dlt/destinations/impl/duckdb/factory.py @@ -25,7 +25,7 @@ def client_class(self) -> t.Type["DuckDbClient"]: # @with_config(spec=DuckDbClientConfiguration, sections=(known_sections.DESTINATION, 'duckdb'), accept_partial=True) def __init__( self, - credentials: t.Union[DuckDbCredentials, str, "DuckDBPyConnection"] = None, + credentials: t.Union[DuckDbCredentials, t.Dict[str, t.Any], str, "DuckDBPyConnection"] = None, create_indexes: bool = False, **kwargs: t.Any, ) -> None: diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py index d1251e51ec..c6471a67e6 100644 --- a/dlt/destinations/impl/filesystem/factory.py +++ b/dlt/destinations/impl/filesystem/factory.py @@ -25,7 +25,7 @@ def client_class(self) -> t.Type["FilesystemClient"]: def __init__( self, bucket_url: str = None, - credentials: t.Union[FileSystemCredentials, t.Dict[str, t.Any]] = None, + credentials: t.Union[FileSystemCredentials, t.Dict[str, t.Any], t.Any] = None, **kwargs: t.Any, ) -> None: super().__init__(bucket_url=bucket_url, credentials=credentials, **kwargs) diff --git a/dlt/destinations/impl/motherduck/factory.py b/dlt/destinations/impl/motherduck/factory.py index 7c7301043d..19f85e8f67 100644 --- a/dlt/destinations/impl/motherduck/factory.py +++ b/dlt/destinations/impl/motherduck/factory.py @@ -24,7 +24,7 @@ def client_class(self) -> t.Type["MotherDuckClient"]: def __init__( self, - credentials: t.Union[MotherDuckCredentials, str, "DuckDBPyConnection"] = None, + credentials: t.Union[MotherDuckCredentials, str, t.Dict[str, t.Any], "DuckDBPyConnection"] = None, create_indexes: bool = False, **kwargs: t.Any, ) -> None: diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py index 542be00e63..fd44568a9a 100644 --- a/dlt/destinations/impl/mssql/factory.py +++ b/dlt/destinations/impl/mssql/factory.py @@ -24,7 +24,7 @@ def client_class(self) -> t.Type["MsSqlClient"]: def __init__( self, - credentials: t.Union[MsSqlCredentials, str] = None, + credentials: t.Union[MsSqlCredentials, t.Dict[str, t.Any], str] = None, create_indexes: bool = True, **kwargs: t.Any, ) -> None: diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py index 208e13efd6..139751b2c1 100644 --- a/dlt/destinations/impl/postgres/factory.py +++ b/dlt/destinations/impl/postgres/factory.py @@ -24,7 +24,7 @@ def client_class(self) -> t.Type["PostgresClient"]: def __init__( self, - credentials: t.Union[PostgresCredentials, str] = None, + credentials: t.Union[PostgresCredentials, t.Dict[str, t.Any], str] = None, create_indexes: bool = True, **kwargs: t.Any, ) -> None: diff --git a/dlt/destinations/impl/qdrant/factory.py b/dlt/destinations/impl/qdrant/factory.py index 5cb436832c..316b5ae434 100644 --- a/dlt/destinations/impl/qdrant/factory.py +++ b/dlt/destinations/impl/qdrant/factory.py @@ -24,7 +24,7 @@ def client_class(self) -> t.Type["QdrantClient"]: def __init__( self, - credentials: t.Optional[QdrantCredentials] = None, + credentials: t.Union[QdrantCredentials, t.Dict[str, t.Any]] = None, **kwargs: t.Any, ) -> None: super().__init__(credentials=credentials, **kwargs) diff --git a/dlt/destinations/impl/redshift/factory.py b/dlt/destinations/impl/redshift/factory.py index de8964a2c0..8f6a98aab7 100644 --- a/dlt/destinations/impl/redshift/factory.py +++ b/dlt/destinations/impl/redshift/factory.py @@ -57,7 +57,7 @@ def client_class(self) -> t.Type["RedshiftClient"]: def __init__( self, - credentials: t.Union[RedshiftCredentials, str] = None, + credentials: t.Union[RedshiftCredentials, t.Dict[str, t.Any], str] = None, create_indexes: bool = True, staging_iam_role: t.Optional[str] = None, **kwargs: t.Any, diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py index 9800461a5e..694ea7376c 100644 --- a/dlt/destinations/impl/snowflake/factory.py +++ b/dlt/destinations/impl/snowflake/factory.py @@ -23,7 +23,7 @@ def client_class(self) -> t.Type["SnowflakeClient"]: def __init__( self, - credentials: SnowflakeCredentials = None, + credentials: t.Union[SnowflakeCredentials, t.Dict[str, t.Any], str] = None, stage_name: t.Optional[str] = None, keep_staged_files: bool = True, **kwargs: t.Any, diff --git a/dlt/destinations/impl/weaviate/factory.py b/dlt/destinations/impl/weaviate/factory.py index c89ba22f5d..4bac1f1a60 100644 --- a/dlt/destinations/impl/weaviate/factory.py +++ b/dlt/destinations/impl/weaviate/factory.py @@ -24,7 +24,7 @@ def client_class(self) -> t.Type["WeaviateClient"]: def __init__( self, - credentials: t.Optional[WeaviateCredentials] = None, + credentials: t.Union[WeaviateCredentials, t.Dict[str, t.Any]] = None, **kwargs: t.Any, ) -> None: super().__init__(credentials=credentials, **kwargs) From 3537c03757112f76806b0c6469032070ff6d14d8 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 15 Nov 2023 18:42:33 -0500 Subject: [PATCH 21/29] Common tests without dest dependencies --- tests/pipeline/test_pipeline.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 92975af96d..94320ef087 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -231,17 +231,19 @@ def test_destination_explicit_credentials(environment: Any) -> None: def test_destination_staging_config(environment: Any) -> None: + fs_dest = filesystem("file:///testing-bucket") p = dlt.pipeline( pipeline_name="staging_pipeline", destination=redshift(credentials="redshift://loader:loader@localhost:5432/dlt_data"), - staging=filesystem("s3://testing-bucket", credentials={"aws_access_key_id": "key_id", "aws_secret_access_key": "key"}) + staging=fs_dest ) schema = Schema("foo") p._inject_schema(schema) - client, staging = p._get_destination_clients(p.default_schema) + initial_config = p._get_destination_client_initial_config(p.staging, as_staging=True) + staging_config = fs_dest.configuration(initial_config) # type: ignore[arg-type] # Ensure that as_staging flag is set in the final resolved conifg - assert staging.config.as_staging is True # type: ignore[attr-defined] + assert staging_config.as_staging is True def test_destination_factory_defaults_resolve_from_config(environment: Any) -> None: @@ -261,15 +263,21 @@ def test_destination_factory_defaults_resolve_from_config(environment: Any) -> N def test_destination_credentials_in_factory(environment: Any) -> None: os.environ['DESTINATION__REDSHIFT__CREDENTIALS'] = "redshift://abc:123@localhost:5432/some_db" - p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=redshift(credentials="redshift://abc:123@localhost:5432/other_db")) + redshift_dest = redshift("redshift://abc:123@localhost:5432/other_db") + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=redshift_dest) + + initial_config = p._get_destination_client_initial_config(p.destination) + dest_config = redshift_dest.configuration(initial_config) # type: ignore[arg-type] # Explicit factory arg supersedes config - assert p.destination_client().config.credentials.database == "other_db" # type: ignore[attr-defined] + assert dest_config.credentials.database == "other_db" - p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=redshift()) + redshift_dest = redshift() + p = dlt.pipeline(pipeline_name="dummy_pipeline", destination=redshift_dest) - # Config value is used if no explicit arg is passed - assert p.destination_client().config.credentials.database == "some_db" # type: ignore[attr-defined] + initial_config = p._get_destination_client_initial_config(p.destination) + dest_config = redshift_dest.configuration(initial_config) # type: ignore[arg-type] + assert dest_config.credentials.database == "some_db" @pytest.mark.skip(reason="does not work on CI. probably takes right credentials from somewhere....") From 25a937afca86ee9bf948e11542f05628980f7c3f Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 15 Nov 2023 19:17:00 -0500 Subject: [PATCH 22/29] Forward all athena arguments --- dlt/destinations/impl/athena/factory.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dlt/destinations/impl/athena/factory.py b/dlt/destinations/impl/athena/factory.py index 53fe9e4e1c..389d4fe6d1 100644 --- a/dlt/destinations/impl/athena/factory.py +++ b/dlt/destinations/impl/athena/factory.py @@ -32,4 +32,12 @@ def __init__( force_iceberg: bool = False, **kwargs: t.Any, ) -> None: - super().__init__(**kwargs) + super().__init__( + query_result_bucket=query_result_bucket, + credentials=credentials, + athena_work_group=athena_work_group, + aws_data_catalog=aws_data_catalog, + supports_truncate_command=supports_truncate_command, + force_iceberg=force_iceberg, + **kwargs, + ) From 97f1afc2fc4a6a0ba92309774a235f0c3595b150 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 15 Nov 2023 19:21:53 -0500 Subject: [PATCH 23/29] Delete commented code --- dlt/destinations/impl/duckdb/factory.py | 1 - dlt/destinations/impl/redshift/factory.py | 33 ----------------------- 2 files changed, 34 deletions(-) diff --git a/dlt/destinations/impl/duckdb/factory.py b/dlt/destinations/impl/duckdb/factory.py index d7c883e2b5..34d76751ac 100644 --- a/dlt/destinations/impl/duckdb/factory.py +++ b/dlt/destinations/impl/duckdb/factory.py @@ -22,7 +22,6 @@ def client_class(self) -> t.Type["DuckDbClient"]: return DuckDbClient - # @with_config(spec=DuckDbClientConfiguration, sections=(known_sections.DESTINATION, 'duckdb'), accept_partial=True) def __init__( self, credentials: t.Union[DuckDbCredentials, t.Dict[str, t.Any], str, "DuckDBPyConnection"] = None, diff --git a/dlt/destinations/impl/redshift/factory.py b/dlt/destinations/impl/redshift/factory.py index 8f6a98aab7..885930a6e9 100644 --- a/dlt/destinations/impl/redshift/factory.py +++ b/dlt/destinations/impl/redshift/factory.py @@ -1,36 +1,3 @@ -# import typing as t - -# from dlt.common.destination import Destination, DestinationCapabilitiesContext - -# from dlt.destinations.impl.postgres.configuration import PostgresCredentials, PostgresClientConfiguration -# from dlt.destinations.impl.postgres import capabilities - -# if t.TYPE_CHECKING: -# from dlt.destinations.impl.postgres.postgres import PostgresClient - - -# class postgres(Destination): - -# spec = PostgresClientConfiguration - -# def capabilities(self) -> DestinationCapabilitiesContext: -# return capabilities() - -# @property -# def client_class(self) -> t.Type["PostgresClient"]: -# from dlt.destinations.impl.postgres.postgres import PostgresClient - -# return PostgresClient - -# def __init__( -# self, -# credentials: t.Union[PostgresCredentials, str] = None, -# create_indexes: bool = True, -# **kwargs: t.Any, -# ) -> None: -# super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) - - import typing as t from dlt.common.destination import Destination, DestinationCapabilitiesContext From fe24e1461a2102c5ae4229bd40da56531a1d8131 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 15 Nov 2023 19:25:56 -0500 Subject: [PATCH 24/29] Reference docstrings --- dlt/common/destination/reference.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 8642250295..6a1dd26be2 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -371,12 +371,12 @@ def __init__(self, **kwargs: Any) -> None: @property @abstractmethod def spec(self) -> Type[TDestinationConfig]: - """Returns the destination configuration spec""" + """A spec of destination configuration that also contains destination credentials""" ... @abstractmethod def capabilities(self) -> DestinationCapabilitiesContext: - """Returns the destination capabilities""" + """Destination capabilities ie. supported loader file formats, identifier name lengths, naming conventions, escape function etc.""" ... @property @@ -386,7 +386,7 @@ def name(self) -> str: @property @abstractmethod def client_class(self) -> Type[TDestinationClient]: - """Returns the client class""" + """A job client class responsible for starting and resuming load jobs""" ... def configuration(self, initial_config: TDestinationConfig) -> TDestinationConfig: @@ -437,6 +437,7 @@ def from_reference(ref: TDestinationReferenceArg, credentials: Optional[Credenti return factory(**kwargs) def client(self, schema: Schema, initial_config: TDestinationConfig = config.value) -> TDestinationClient: + """Returns a configured instance of the destination's job client""" return self.client_class(schema, self.configuration(initial_config)) From 481a7cb5e147c9cca5d5ffc6d36b65520ab0e516 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 15 Nov 2023 19:58:05 -0500 Subject: [PATCH 25/29] Add deprecation warning for credentials argument --- dlt/__init__.py | 2 ++ dlt/pipeline/__init__.py | 3 +++ dlt/pipeline/deprecations.py | 20 ++++++++++++++++++++ dlt/pipeline/pipeline.py | 6 ++++++ 4 files changed, 31 insertions(+) create mode 100644 dlt/pipeline/deprecations.py diff --git a/dlt/__init__.py b/dlt/__init__.py index f5dde3f204..728343bdd6 100644 --- a/dlt/__init__.py +++ b/dlt/__init__.py @@ -31,6 +31,7 @@ from dlt.extract.decorators import source, resource, transformer, defer from dlt.pipeline import pipeline as _pipeline, run, attach, Pipeline, dbt, current as _current, mark as _mark from dlt.pipeline import progress +from dlt import destinations pipeline = _pipeline current = _current @@ -64,4 +65,5 @@ "TSecretValue", "TCredentials", "sources", + "destinations", ] diff --git a/dlt/pipeline/__init__.py b/dlt/pipeline/__init__.py index 3faad8f5a0..af7dd12294 100644 --- a/dlt/pipeline/__init__.py +++ b/dlt/pipeline/__init__.py @@ -13,6 +13,7 @@ from dlt.pipeline.configuration import PipelineConfiguration, ensure_correct_pipeline_kwargs from dlt.pipeline.pipeline import Pipeline from dlt.pipeline.progress import _from_name as collector_from_name, TCollectorArg, _NULL_COLLECTOR +from dlt.pipeline.deprecations import credentials_argument_deprecated @overload @@ -104,6 +105,8 @@ def pipeline( # is any of the arguments different from defaults has_arguments = bool(orig_args[0]) or any(orig_args[1].values()) + credentials_argument_deprecated("pipeline", credentials, destination) + if not has_arguments: context = Container()[PipelineContext] # if pipeline instance is already active then return it, otherwise create a new one diff --git a/dlt/pipeline/deprecations.py b/dlt/pipeline/deprecations.py new file mode 100644 index 0000000000..138167c8d3 --- /dev/null +++ b/dlt/pipeline/deprecations.py @@ -0,0 +1,20 @@ +import typing as t +import warnings + +from dlt.common.destination import Destination, TDestinationReferenceArg + + +def credentials_argument_deprecated( + caller_name: str, credentials: t.Optional[t.Any], destination: TDestinationReferenceArg = None +) -> None: + if credentials is None: + return + + dest_name = Destination.to_name(destination) if destination else "postgres" + + warnings.warn( + f"The `credentials argument` to {caller_name} is deprecated and will be removed in a future version. " + f"Pass the same credentials to the `destination` instance instead, e.g. {caller_name}(destination=dlt.destinations.{dest_name}(credentials=...))", + DeprecationWarning, + stacklevel=2, + ) diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index cf90cf1ff2..1061953569 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -52,6 +52,7 @@ from dlt.pipeline.state_sync import STATE_ENGINE_VERSION, load_state_from_destination, merge_state_if_changed, migrate_state, state_resource, json_encode_state, json_decode_state from dlt.common.schema.utils import normalize_schema_name +from dlt.pipeline.deprecations import credentials_argument_deprecated def with_state_sync(may_extract_state: bool = False) -> Callable[[TFun], TFun]: @@ -342,6 +343,9 @@ def load( # set destination and default dataset if provided self._set_destinations(destination, None) self._set_dataset_name(dataset_name) + + credentials_argument_deprecated("pipeline.load", credentials, destination) + self.credentials = credentials or self.credentials # check if any schema is present, if not then no data was extracted @@ -449,6 +453,8 @@ def run( self._set_destinations(destination, staging) self._set_dataset_name(dataset_name) + credentials_argument_deprecated("pipeline.run", credentials, self.destination) + # sync state with destination if self.config.restore_from_destination and not self.full_refresh and not self._state_restored and (self.destination or destination): self.sync_destination(destination, staging, dataset_name) From d91402cf75c080887e3111c882d98d349d2e2e4e Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 15 Nov 2023 20:58:56 -0500 Subject: [PATCH 26/29] Init docstrings for destination factories --- dlt/destinations/impl/athena/factory.py | 14 ++++++++++++-- dlt/destinations/impl/duckdb/factory.py | 10 ++++++++++ dlt/destinations/impl/filesystem/factory.py | 19 +++++++++++++++++++ dlt/destinations/impl/motherduck/factory.py | 10 ++++++++++ dlt/destinations/impl/mssql/factory.py | 10 ++++++++++ dlt/destinations/impl/postgres/factory.py | 10 ++++++++++ dlt/destinations/impl/redshift/factory.py | 11 +++++++++++ dlt/destinations/impl/snowflake/factory.py | 10 ++++++++++ dlt/destinations/impl/weaviate/factory.py | 19 ++++++++++++++++++- 9 files changed, 110 insertions(+), 3 deletions(-) diff --git a/dlt/destinations/impl/athena/factory.py b/dlt/destinations/impl/athena/factory.py index 389d4fe6d1..cc2b027695 100644 --- a/dlt/destinations/impl/athena/factory.py +++ b/dlt/destinations/impl/athena/factory.py @@ -28,16 +28,26 @@ def __init__( credentials: t.Union[AwsCredentials, t.Dict[str, t.Any], t.Any] = None, athena_work_group: t.Optional[str] = None, aws_data_catalog: t.Optional[str] = "awsdatacatalog", - supports_truncate_command: bool = False, force_iceberg: bool = False, **kwargs: t.Any, ) -> None: + """Configure the Athena destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + query_result_bucket: S3 bucket to store query results in + credentials: AWS credentials to connect to the Athena database. + athena_work_group: Athena work group to use + aws_data_catalog: Athena data catalog to use + force_iceberg: Force iceberg tables + **kwargs: Additional arguments passed to the destination config + """ super().__init__( query_result_bucket=query_result_bucket, credentials=credentials, athena_work_group=athena_work_group, aws_data_catalog=aws_data_catalog, - supports_truncate_command=supports_truncate_command, force_iceberg=force_iceberg, **kwargs, ) diff --git a/dlt/destinations/impl/duckdb/factory.py b/dlt/destinations/impl/duckdb/factory.py index 34d76751ac..1b882c52a1 100644 --- a/dlt/destinations/impl/duckdb/factory.py +++ b/dlt/destinations/impl/duckdb/factory.py @@ -28,4 +28,14 @@ def __init__( create_indexes: bool = False, **kwargs: t.Any, ) -> None: + """Configure the DuckDB destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the duckdb database. Can be an instance of `DuckDbCredentials` or + a path to a database file. Use `:memory:` to create an in-memory database. + create_indexes: Should unique indexes be created + **kwargs: Additional arguments passed to the destination config + """ super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) diff --git a/dlt/destinations/impl/filesystem/factory.py b/dlt/destinations/impl/filesystem/factory.py index c6471a67e6..4e2a716d79 100644 --- a/dlt/destinations/impl/filesystem/factory.py +++ b/dlt/destinations/impl/filesystem/factory.py @@ -28,4 +28,23 @@ def __init__( credentials: t.Union[FileSystemCredentials, t.Dict[str, t.Any], t.Any] = None, **kwargs: t.Any, ) -> None: + """Configure the filesystem destination to use in a pipeline and load data to local or remote filesystem. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + The `bucket_url` determines the protocol to be used: + + - Local folder: `file:///path/to/directory` + - AWS S3 (and S3 compatible storages): `s3://bucket-name + - Azure Blob Storage: `az://container-name + - Google Cloud Storage: `gs://bucket-name + - Memory fs: `memory://m` + + Args: + bucket_url: The fsspec compatible bucket url to use for the destination. + credentials: Credentials to connect to the filesystem. The type of credentials should correspond to + the bucket protocol. For example, for AWS S3, the credentials should be an instance of `AwsCredentials`. + A dictionary with the credentials parameters can also be provided. + **kwargs: Additional arguments passed to the destination config + """ super().__init__(bucket_url=bucket_url, credentials=credentials, **kwargs) diff --git a/dlt/destinations/impl/motherduck/factory.py b/dlt/destinations/impl/motherduck/factory.py index 19f85e8f67..17cf4a76b4 100644 --- a/dlt/destinations/impl/motherduck/factory.py +++ b/dlt/destinations/impl/motherduck/factory.py @@ -28,4 +28,14 @@ def __init__( create_indexes: bool = False, **kwargs: t.Any, ) -> None: + """Configure the MotherDuck destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the MotherDuck database. Can be an instance of `MotherDuckCredentials` or + a connection string in the format `md:///?token=` + create_indexes: Should unique indexes be created + **kwargs: Additional arguments passed to the destination config + """ super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) diff --git a/dlt/destinations/impl/mssql/factory.py b/dlt/destinations/impl/mssql/factory.py index fd44568a9a..c98531ca79 100644 --- a/dlt/destinations/impl/mssql/factory.py +++ b/dlt/destinations/impl/mssql/factory.py @@ -28,4 +28,14 @@ def __init__( create_indexes: bool = True, **kwargs: t.Any, ) -> None: + """Configure the MsSql destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the mssql database. Can be an instance of `MsSqlCredentials` or + a connection string in the format `mssql://user:password@host:port/database` + create_indexes: Should unique indexes be created + **kwargs: Additional arguments passed to the destination config + """ super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py index 139751b2c1..33971eb642 100644 --- a/dlt/destinations/impl/postgres/factory.py +++ b/dlt/destinations/impl/postgres/factory.py @@ -28,4 +28,14 @@ def __init__( create_indexes: bool = True, **kwargs: t.Any, ) -> None: + """Configure the Postgres destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the postgres database. Can be an instance of `PostgresCredentials` or + a connection string in the format `postgres://user:password@host:port/database` + create_indexes: Should unique indexes be created + **kwargs: Additional arguments passed to the destination config + """ super().__init__(credentials=credentials, create_indexes=create_indexes, **kwargs) diff --git a/dlt/destinations/impl/redshift/factory.py b/dlt/destinations/impl/redshift/factory.py index 885930a6e9..7648b35851 100644 --- a/dlt/destinations/impl/redshift/factory.py +++ b/dlt/destinations/impl/redshift/factory.py @@ -29,6 +29,17 @@ def __init__( staging_iam_role: t.Optional[str] = None, **kwargs: t.Any, ) -> None: + """Configure the Redshift destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the redshift database. Can be an instance of `RedshiftCredentials` or + a connection string in the format `redshift://user:password@host:port/database` + create_indexes: Should unique indexes be created + staging_iam_role: IAM role to use for staging data in S3 + **kwargs: Additional arguments passed to the destination config + """ super().__init__( credentials=credentials, create_indexes=create_indexes, staging_iam_role=staging_iam_role, **kwargs ) diff --git a/dlt/destinations/impl/snowflake/factory.py b/dlt/destinations/impl/snowflake/factory.py index 694ea7376c..1201f406b0 100644 --- a/dlt/destinations/impl/snowflake/factory.py +++ b/dlt/destinations/impl/snowflake/factory.py @@ -28,4 +28,14 @@ def __init__( keep_staged_files: bool = True, **kwargs: t.Any, ) -> None: + """Configure the Snowflake destination to use in a pipeline. + + All arguments provided here supersede other configuration sources such as environment variables and dlt config files. + + Args: + credentials: Credentials to connect to the snowflake database. Can be an instance of `SnowflakeCredentials` or + a connection string in the format `snowflake://user:password@host:port/database` + stage_name: Name of an existing stage to use for loading data. Default uses implicit stage per table + keep_staged_files: Whether to delete or keep staged files after loading + """ super().__init__(credentials=credentials, stage_name=stage_name, keep_staged_files=keep_staged_files, **kwargs) diff --git a/dlt/destinations/impl/weaviate/factory.py b/dlt/destinations/impl/weaviate/factory.py index 4bac1f1a60..b29d02b1a7 100644 --- a/dlt/destinations/impl/weaviate/factory.py +++ b/dlt/destinations/impl/weaviate/factory.py @@ -25,6 +25,23 @@ def client_class(self) -> t.Type["WeaviateClient"]: def __init__( self, credentials: t.Union[WeaviateCredentials, t.Dict[str, t.Any]] = None, + vectorizer: str = None, + module_config: t.Dict[str, t.Dict[str, str]] = None, **kwargs: t.Any, ) -> None: - super().__init__(credentials=credentials, **kwargs) + """Configure the Weaviate destination to use in a pipeline. + + All destination config parameters can be provided as arguments here and will supersede other config sources (such as dlt config files and environment variables). + + Args: + credentials: Weaviate credentials containing URL, API key and optional headers + vectorizer: The name of the Weaviate vectorizer to use + module_config: The configuration for the Weaviate modules + **kwargs: Additional arguments forwarded to the destination config + """ + super().__init__( + credentials=credentials, + vectorizer=vectorizer, + module_config=module_config, + **kwargs + ) From fc92929f0a952b050d966109c12185d5eff3cb7f Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Fri, 17 Nov 2023 12:12:13 -0500 Subject: [PATCH 27/29] Fix tests --- dlt/common/destination/reference.py | 9 +++++++-- docs/website/docs/dlt-ecosystem/destinations/weaviate.md | 2 +- docs/website/docs/getting-started-snippets.py | 2 +- tests/common/test_destination.py | 4 ++-- tests/load/cases/fake_destination.py | 7 ++++++- tests/load/duckdb/test_duckdb_client.py | 2 +- tests/load/weaviate/test_pipeline.py | 2 +- 7 files changed, 19 insertions(+), 9 deletions(-) diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py index 6a1dd26be2..1c3560cbbd 100644 --- a/dlt/common/destination/reference.py +++ b/dlt/common/destination/reference.py @@ -431,10 +431,15 @@ def from_reference(ref: TDestinationReferenceArg, credentials: Optional[Credenti try: factory: Type[Destination[DestinationClientConfiguration, JobClientBase]] = getattr(dest_module, attr_name) except AttributeError as e: - raise InvalidDestinationReference(ref) from e + raise UnknownDestinationModule(ref) from e if credentials: kwargs["credentials"] = credentials - return factory(**kwargs) + try: + dest = factory(**kwargs) + dest.spec + except Exception as e: + raise InvalidDestinationReference(ref) from e + return dest def client(self, schema: Schema, initial_config: TDestinationConfig = config.value) -> TDestinationClient: """Returns a configured instance of the destination's job client""" diff --git a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md index 8c626266a4..fe7dafc243 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md +++ b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md @@ -252,7 +252,7 @@ it will be normalized to: so your best course of action is to clean up the data yourself before loading and use default naming convention. Nevertheless you can configure the alternative in `config.toml`: ```toml [schema] -naming="dlt.destinations.weaviate.ci_naming" +naming="dlt.destinations.weaviate.impl.ci_naming" ``` ## Additional destination options diff --git a/docs/website/docs/getting-started-snippets.py b/docs/website/docs/getting-started-snippets.py index c4bd789834..be21a7f757 100644 --- a/docs/website/docs/getting-started-snippets.py +++ b/docs/website/docs/getting-started-snippets.py @@ -290,7 +290,7 @@ def pdf_to_weaviate_snippet() -> None: import os import dlt - from dlt.destinations.weaviate import weaviate_adapter + from dlt.destinations.impl.weaviate import weaviate_adapter from PyPDF2 import PdfReader diff --git a/tests/common/test_destination.py b/tests/common/test_destination.py index 53cf8185d7..5483a95f45 100644 --- a/tests/common/test_destination.py +++ b/tests/common/test_destination.py @@ -11,7 +11,7 @@ def test_import_unknown_destination() -> None: # standard destination - with pytest.raises(InvalidDestinationReference): + with pytest.raises(UnknownDestinationModule): Destination.from_reference("meltdb") # custom module with pytest.raises(UnknownDestinationModule): @@ -20,7 +20,7 @@ def test_import_unknown_destination() -> None: def test_invalid_destination_reference() -> None: with pytest.raises(InvalidDestinationReference): - Destination.from_reference("tests.load.cases.fake_destination") + Destination.from_reference("tests.load.cases.fake_destination.not_a_destination") def test_import_all_destinations() -> None: diff --git a/tests/load/cases/fake_destination.py b/tests/load/cases/fake_destination.py index 152b2db918..016cc19020 100644 --- a/tests/load/cases/fake_destination.py +++ b/tests/load/cases/fake_destination.py @@ -1 +1,6 @@ -# module that is used to test wrong destination references \ No newline at end of file +# module that is used to test wrong destination references + + +class not_a_destination: + def __init__(self, **kwargs) -> None: + pass diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py index f5d2f92a4c..ddfc681a84 100644 --- a/tests/load/duckdb/test_duckdb_client.py +++ b/tests/load/duckdb/test_duckdb_client.py @@ -153,7 +153,7 @@ def test_keeps_initial_db_path() -> None: def test_duckdb_database_delete() -> None: db_path = "_storage/path_test_quack.duckdb" - p = dlt.pipeline(pipeline_name="quack_pipeline", destination=duckdb(credentials=DuckDbCredentials(db_path))) + p = dlt.pipeline(pipeline_name="quack_pipeline", destination=duckdb(credentials=db_path)) p.run([1, 2, 3], table_name="table", dataset_name="dataset") # attach the pipeline p = dlt.attach(pipeline_name="quack_pipeline") diff --git a/tests/load/weaviate/test_pipeline.py b/tests/load/weaviate/test_pipeline.py index a6376ba1bc..691281c63e 100644 --- a/tests/load/weaviate/test_pipeline.py +++ b/tests/load/weaviate/test_pipeline.py @@ -374,7 +374,7 @@ def test_vectorize_property_without_data() -> None: # set the naming convention to case insensitive # os.environ["SCHEMA__NAMING"] = "direct" - dlt.config["schema.naming"] = "dlt.destinations.weaviate.ci_naming" + dlt.config["schema.naming"] = "dlt.destinations.impl.weaviate.ci_naming" # create new schema with changed naming convention p = p.drop() info = p.run(weaviate_adapter(["there are", "no stop", "words in here"], vectorize="vAlue"), primary_key="vALue", columns={"vAlue": {"data_type": "text"}}) From 13ec6fbf13d5bde3b3c1a481ebf05e0390caae17 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Fri, 17 Nov 2023 12:38:05 -0500 Subject: [PATCH 28/29] Destination name in output --- tests/cli/test_pipeline_command.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cli/test_pipeline_command.py b/tests/cli/test_pipeline_command.py index 1ffc0c66aa..a12b4f1c07 100644 --- a/tests/cli/test_pipeline_command.py +++ b/tests/cli/test_pipeline_command.py @@ -44,7 +44,7 @@ def test_pipeline_command_operations(repo_dir: str, project_files: FileStorage) pipeline_command.pipeline_command("info", "chess_pipeline", None, 0) _out = buf.getvalue() # do we have duckdb destination - assert "dlt.destinations.duckdb" in _out + assert "destination: duckdb" in _out print(_out) with io.StringIO() as buf, contextlib.redirect_stdout(buf): From 1f16c8f0bfcfce5f60551f1aea0cba165ca006f3 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Fri, 17 Nov 2023 14:42:04 -0500 Subject: [PATCH 29/29] Correct exception in unknown destination test --- tests/pipeline/test_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 94320ef087..f1fbfefa08 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -16,7 +16,7 @@ from dlt.common.configuration.specs.gcp_credentials import GcpOAuthCredentials from dlt.common.destination import DestinationCapabilitiesContext from dlt.common.destination.capabilities import TLoaderFileFormat -from dlt.common.exceptions import DestinationHasFailedJobs, DestinationTerminalException, PipelineStateNotAvailable, InvalidDestinationReference +from dlt.common.exceptions import DestinationHasFailedJobs, DestinationTerminalException, PipelineStateNotAvailable, UnknownDestinationModule from dlt.common.pipeline import PipelineContext from dlt.common.runtime.collector import AliveCollector, EnlightenCollector, LogCollector, TqdmCollector from dlt.common.schema.utils import new_column, new_table @@ -165,7 +165,7 @@ def test_pipeline_context() -> None: def test_import_unknown_destination() -> None: - with pytest.raises(InvalidDestinationReference): + with pytest.raises(UnknownDestinationModule): dlt.pipeline(destination="!")