From 157dc90804e0f560685bd25ceab182e46bad7357 Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 12 Sep 2023 12:10:35 +0200 Subject: [PATCH] disables index creation by default, updates docs --- dlt/destinations/mssql/configuration.py | 2 +- .../docs/dlt-ecosystem/destinations/mssql.md | 48 ++++++++++++++++++- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/dlt/destinations/mssql/configuration.py b/dlt/destinations/mssql/configuration.py index 88e9242f39..581da0f782 100644 --- a/dlt/destinations/mssql/configuration.py +++ b/dlt/destinations/mssql/configuration.py @@ -77,7 +77,7 @@ class MsSqlClientConfiguration(DestinationClientDwhWithStagingConfiguration): destination_name: Final[str] = "mssql" # type: ignore credentials: MsSqlCredentials - create_indexes: bool = True + create_indexes: bool = False def fingerprint(self) -> str: """Returns a fingerprint of host part of a connection string""" diff --git a/docs/website/docs/dlt-ecosystem/destinations/mssql.md b/docs/website/docs/dlt-ecosystem/destinations/mssql.md index af2b4037eb..4d77b1d30e 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/mssql.md +++ b/docs/website/docs/dlt-ecosystem/destinations/mssql.md @@ -15,6 +15,11 @@ This can't be included with `dlt`s python dependencies so you must installed it See instructions here to [install Microsoft ODBC Driver 18 for SQL Server on Windows, Mac and Linux](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16) +Following ODBC drivers are supported: +* ODBC Driver 18 for SQL Server +* ODBC Driver 17 for SQL Server +[You configure driver name explicitly](#additional-destination-options) as well. + ### Create a pipeline **1. Initalize a project with a pipeline that loads to MS SQL by running** @@ -22,7 +27,7 @@ See instructions here to [install Microsoft ODBC Driver 18 for SQL Server on Win dlt init chess mssql ``` -**2. Install the necessary dependencies for BigQuery by running** +**2. Install the necessary dependencies for MS SQL by running** ``` pip install -r requirements.txt ``` @@ -45,12 +50,51 @@ port = 1433 connect_timeout = 15 ``` +You can also pass a SQLAlchemy-like database connection: +```toml +# keep it at the top of your toml file! before any section starts +destination.mssql.credentials="mssql://loader:@loader.database.windows.net/dlt_data?connect_timeout=15" +``` + +To pass credentials directly you can use `credentials` argument passed to `dlt.pipeline` or `pipeline.run` methods. +```python +pipeline = dlt.pipeline(pipeline_name='chess', destination='postgres', dataset_name='chess_data', credentials="mssql://loader:@loader.database.windows.net/dlt_data?connect_timeout=15") +``` + +## Write disposition +All write dispositions are supported + +If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized` the destination tables will be dropped and +recreated with an `ALTER SCHEMA ... TRANSFER`. The operation is atomic: mssql supports DDL transactions. + +## Data loading +Data is loaded via INSERT statements by default. MSSQL has a limit of 1000 rows per INSERT and this is what we use. + ## Supported file formats * [insert-values](../file-formats/insert-format.md) is used by default +## Supported column hints +**mssql** will create unique indexes for all columns with `unique` hints. This behavior **may be disabled** + ## Syncing of `dlt` state This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination) ## Data types - MS SQL does not support JSON columns, so JSON objects are stored as strings in `nvarchar` column. + +## Additional destination options +**mssql** destination **does not** creates UNIQUE indexes by default on columns with `unique` hint (ie. `_dlt_id`). To enable this behavior +```toml +[destination.mssql] +create_indexes=true +``` + +You can explicitly set the ODBC driver name: +```toml +[destination.mssql.credentials] +odbc_driver="ODBC Driver 18 for SQL Server" +``` + +### dbt support +No dbt support yet +