From 7bc2163ff001b9a3299827e1d3ddf0da021f36d6 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Thu, 18 Jan 2024 01:18:56 +0100
Subject: [PATCH 01/23] Synapse destination initial commit

---
 .../workflows/test_destination_synapse.yml    |  22 ++-
 dlt/common/data_writers/escape.py             |  10 +-
 dlt/common/data_writers/writers.py            |  24 +++-
 dlt/common/destination/capabilities.py        |   1 +
 dlt/destinations/__init__.py                  |   2 +
 dlt/destinations/impl/mssql/configuration.py  |  31 +++--
 dlt/destinations/impl/mssql/sql_client.py     |   7 +-
 dlt/destinations/impl/synapse/README.md       |  58 ++++++++
 dlt/destinations/impl/synapse/__init__.py     |  46 +++++++
 .../impl/synapse/configuration.py             |  38 +++++
 dlt/destinations/impl/synapse/factory.py      |  51 +++++++
 dlt/destinations/impl/synapse/sql_client.py   |  28 ++++
 dlt/destinations/impl/synapse/synapse.py      |  99 +++++++++++++
 dlt/destinations/insert_job_client.py         |  18 ++-
 dlt/helpers/dbt/profiles.yml                  |  18 ++-
 poetry.lock                                   |   3 +-
 pyproject.toml                                |   1 +
 tests/load/mssql/test_mssql_credentials.py    |  69 +++++++---
 tests/load/mssql/test_mssql_table_builder.py  |   3 +-
 tests/load/pipeline/test_dbt_helper.py        |   7 +-
 .../load/pipeline/test_replace_disposition.py |   6 +
 tests/load/synapse/__init__.py                |   3 +
 .../synapse/test_synapse_configuration.py     |  46 +++++++
 .../synapse/test_synapse_table_builder.py     | 130 ++++++++++++++++++
 tests/load/test_job_client.py                 |   3 +-
 tests/load/test_sql_client.py                 |  12 +-
 tests/load/utils.py                           |   7 +-
 tests/utils.py                                |   1 +
 28 files changed, 672 insertions(+), 72 deletions(-)
 create mode 100644 dlt/destinations/impl/synapse/README.md
 create mode 100644 dlt/destinations/impl/synapse/__init__.py
 create mode 100644 dlt/destinations/impl/synapse/configuration.py
 create mode 100644 dlt/destinations/impl/synapse/factory.py
 create mode 100644 dlt/destinations/impl/synapse/sql_client.py
 create mode 100644 dlt/destinations/impl/synapse/synapse.py
 create mode 100644 tests/load/synapse/__init__.py
 create mode 100644 tests/load/synapse/test_synapse_configuration.py
 create mode 100644 tests/load/synapse/test_synapse_table_builder.py

diff --git a/.github/workflows/test_destination_synapse.yml b/.github/workflows/test_destination_synapse.yml
index 83800fa789..ecd890d32a 100644
--- a/.github/workflows/test_destination_synapse.yml
+++ b/.github/workflows/test_destination_synapse.yml
@@ -5,7 +5,6 @@ on:
     branches:
       - master
       - devel
-
   workflow_dispatch:
 
 env:
@@ -18,19 +17,14 @@ env:
   ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"
 
 jobs:
-
-  build:
-    runs-on: ubuntu-latest
-
-    steps:
-    - name: Check source branch name
-      run: |
-        if [[ "${{ github.head_ref }}" != "synapse" ]]; then
-          exit 1
-        fi
+  get_docs_changes:
+    uses: ./.github/workflows/get_docs_changes.yml
+    if: ${{ !github.event.pull_request.head.repo.fork }}
 
   run_loader:
     name: Tests Synapse loader
+    needs: get_docs_changes
+    if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'    
     strategy:
       fail-fast: false
       matrix:
@@ -69,17 +63,17 @@ jobs:
           key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp
 
       - name: Install dependencies
-        run: poetry install --no-interaction -E synapse -E s3 -E gs -E az --with sentry-sdk --with pipeline
+        run: poetry install --no-interaction -E synapse -E parquet --with sentry-sdk --with pipeline
 
       - name: create secrets.toml
         run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
 
       - run: |
-          poetry run pytest tests/load --ignore tests/load/pipeline/test_dbt_helper.py
+          poetry run pytest tests/load
         if: runner.os != 'Windows'
         name: Run tests Linux/MAC
       - run: |
-          poetry run pytest tests/load --ignore tests/load/pipeline/test_dbt_helper.py
+          poetry run pytest tests/load
         if: runner.os == 'Windows'
         name: Run tests Windows
         shell: cmd
diff --git a/dlt/common/data_writers/escape.py b/dlt/common/data_writers/escape.py
index 5bf8f29ccb..b56a0d8f19 100644
--- a/dlt/common/data_writers/escape.py
+++ b/dlt/common/data_writers/escape.py
@@ -98,8 +98,14 @@ def escape_mssql_literal(v: Any) -> Any:
             json.dumps(v), prefix="N'", escape_dict=MS_SQL_ESCAPE_DICT, escape_re=MS_SQL_ESCAPE_RE
         )
     if isinstance(v, bytes):
-        base_64_string = base64.b64encode(v).decode("ascii")
-        return f"""CAST('' AS XML).value('xs:base64Binary("{base_64_string}")', 'VARBINARY(MAX)')"""
+        # 8000 is the max value for n in VARBINARY(n)
+        # https://learn.microsoft.com/en-us/sql/t-sql/data-types/binary-and-varbinary-transact-sql
+        if len(v) <= 8000:
+            n = len(v)
+        else:
+            n = "MAX"
+        return f"CONVERT(VARBINARY({n}), '{v.hex()}', 2)"
+
     if isinstance(v, bool):
         return str(int(v))
     if v is None:
diff --git a/dlt/common/data_writers/writers.py b/dlt/common/data_writers/writers.py
index 0f9ff09259..0f3640da1e 100644
--- a/dlt/common/data_writers/writers.py
+++ b/dlt/common/data_writers/writers.py
@@ -175,18 +175,29 @@ def write_header(self, columns_schema: TTableSchemaColumns) -> None:
         # do not write INSERT INTO command, this must be added together with table name by the loader
         self._f.write("INSERT INTO {}(")
         self._f.write(",".join(map(self._caps.escape_identifier, headers)))
-        self._f.write(")\nVALUES\n")
+        if self._caps.insert_values_writer_type == "default":
+            self._f.write(")\nVALUES\n")
+        elif self._caps.insert_values_writer_type == "select_union":
+            self._f.write(")\n")
 
     def write_data(self, rows: Sequence[Any]) -> None:
         super().write_data(rows)
 
-        def write_row(row: StrAny) -> None:
+        def write_row(row: StrAny, last_row: bool = False) -> None:
             output = ["NULL"] * len(self._headers_lookup)
             for n, v in row.items():
                 output[self._headers_lookup[n]] = self._caps.escape_literal(v)
-            self._f.write("(")
-            self._f.write(",".join(output))
-            self._f.write(")")
+            if self._caps.insert_values_writer_type == "default":
+                self._f.write("(")
+                self._f.write(",".join(output))
+                self._f.write(")")
+                if not last_row:
+                    self._f.write(",\n")
+            elif self._caps.insert_values_writer_type == "select_union":
+                self._f.write("SELECT ")
+                self._f.write(",".join(output))
+                if not last_row:
+                    self._f.write("\nUNION ALL\n")
 
         # if next chunk add separator
         if self._chunks_written > 0:
@@ -195,10 +206,9 @@ def write_row(row: StrAny) -> None:
         # write rows
         for row in rows[:-1]:
             write_row(row)
-            self._f.write(",\n")
 
         # write last row without separator so we can write footer eventually
-        write_row(rows[-1])
+        write_row(rows[-1], last_row=True)
         self._chunks_written += 1
 
     def write_footer(self) -> None:
diff --git a/dlt/common/destination/capabilities.py b/dlt/common/destination/capabilities.py
index 2596b2bf99..08c7a31388 100644
--- a/dlt/common/destination/capabilities.py
+++ b/dlt/common/destination/capabilities.py
@@ -52,6 +52,7 @@ class DestinationCapabilitiesContext(ContainerInjectableContext):
     schema_supports_numeric_precision: bool = True
     timestamp_precision: int = 6
     max_rows_per_insert: Optional[int] = None
+    insert_values_writer_type: str = "default"
 
     # do not allow to create default value, destination caps must be always explicitly inserted into container
     can_create_default: ClassVar[bool] = False
diff --git a/dlt/destinations/__init__.py b/dlt/destinations/__init__.py
index 980c4ce7f2..775778cd4a 100644
--- a/dlt/destinations/__init__.py
+++ b/dlt/destinations/__init__.py
@@ -10,6 +10,7 @@
 from dlt.destinations.impl.qdrant.factory import qdrant
 from dlt.destinations.impl.motherduck.factory import motherduck
 from dlt.destinations.impl.weaviate.factory import weaviate
+from dlt.destinations.impl.synapse.factory import synapse
 
 
 __all__ = [
@@ -25,4 +26,5 @@
     "qdrant",
     "motherduck",
     "weaviate",
+    "synapse",
 ]
diff --git a/dlt/destinations/impl/mssql/configuration.py b/dlt/destinations/impl/mssql/configuration.py
index f33aca4b82..f00998cfb2 100644
--- a/dlt/destinations/impl/mssql/configuration.py
+++ b/dlt/destinations/impl/mssql/configuration.py
@@ -1,4 +1,4 @@
-from typing import Final, ClassVar, Any, List, Optional, TYPE_CHECKING
+from typing import Final, ClassVar, Any, List, Dict, Optional, TYPE_CHECKING
 from sqlalchemy.engine import URL
 
 from dlt.common.configuration import configspec
@@ -10,9 +10,6 @@
 from dlt.common.destination.reference import DestinationClientDwhWithStagingConfiguration
 
 
-SUPPORTED_DRIVERS = ["ODBC Driver 18 for SQL Server", "ODBC Driver 17 for SQL Server"]
-
-
 @configspec
 class MsSqlCredentials(ConnectionStringCredentials):
     drivername: Final[str] = "mssql"  # type: ignore
@@ -24,22 +21,27 @@ class MsSqlCredentials(ConnectionStringCredentials):
 
     __config_gen_annotations__: ClassVar[List[str]] = ["port", "connect_timeout"]
 
+    SUPPORTED_DRIVERS: ClassVar[List[str]] = [
+        "ODBC Driver 18 for SQL Server",
+        "ODBC Driver 17 for SQL Server",
+    ]
+
     def parse_native_representation(self, native_value: Any) -> None:
         # TODO: Support ODBC connection string or sqlalchemy URL
         super().parse_native_representation(native_value)
         if self.query is not None:
             self.query = {k.lower(): v for k, v in self.query.items()}  # Make case-insensitive.
-        if "driver" in self.query and self.query.get("driver") not in SUPPORTED_DRIVERS:
-            raise SystemConfigurationException(
-                f"""The specified driver "{self.query.get('driver')}" is not supported."""
-                f" Choose one of the supported drivers: {', '.join(SUPPORTED_DRIVERS)}."
-            )
         self.driver = self.query.get("driver", self.driver)
         self.connect_timeout = int(self.query.get("connect_timeout", self.connect_timeout))
         if not self.is_partial():
             self.resolve()
 
     def on_resolved(self) -> None:
+        if self.driver not in self.SUPPORTED_DRIVERS:
+            raise SystemConfigurationException(
+                f"""The specified driver "{self.driver}" is not supported."""
+                f" Choose one of the supported drivers: {', '.join(self.SUPPORTED_DRIVERS)}."
+            )
         self.database = self.database.lower()
 
     def to_url(self) -> URL:
@@ -55,20 +57,21 @@ def on_partial(self) -> None:
     def _get_driver(self) -> str:
         if self.driver:
             return self.driver
+
         # Pick a default driver if available
         import pyodbc
 
         available_drivers = pyodbc.drivers()
-        for d in SUPPORTED_DRIVERS:
+        for d in self.SUPPORTED_DRIVERS:
             if d in available_drivers:
                 return d
         docs_url = "https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16"
         raise SystemConfigurationException(
             f"No supported ODBC driver found for MS SQL Server.  See {docs_url} for information on"
-            f" how to install the '{SUPPORTED_DRIVERS[0]}' on your platform."
+            f" how to install the '{self.SUPPORTED_DRIVERS[0]}' on your platform."
         )
 
-    def to_odbc_dsn(self) -> str:
+    def _get_odbc_dsn_dict(self) -> Dict[str, Any]:
         params = {
             "DRIVER": self.driver,
             "SERVER": f"{self.host},{self.port}",
@@ -78,6 +81,10 @@ def to_odbc_dsn(self) -> str:
         }
         if self.query is not None:
             params.update({k.upper(): v for k, v in self.query.items()})
+        return params
+
+    def to_odbc_dsn(self) -> str:
+        params = self._get_odbc_dsn_dict()
         return ";".join([f"{k}={v}" for k, v in params.items()])
 
 
diff --git a/dlt/destinations/impl/mssql/sql_client.py b/dlt/destinations/impl/mssql/sql_client.py
index 427518feeb..2ddd56350e 100644
--- a/dlt/destinations/impl/mssql/sql_client.py
+++ b/dlt/destinations/impl/mssql/sql_client.py
@@ -106,8 +106,8 @@ def drop_dataset(self) -> None:
         )
         table_names = [row[0] for row in rows]
         self.drop_tables(*table_names)
-
-        self.execute_sql("DROP SCHEMA IF EXISTS %s;" % self.fully_qualified_dataset_name())
+        # Drop schema
+        self._drop_schema()
 
     def _drop_views(self, *tables: str) -> None:
         if not tables:
@@ -117,6 +117,9 @@ def _drop_views(self, *tables: str) -> None:
         ]
         self.execute_fragments(statements)
 
+    def _drop_schema(self) -> None:
+        self.execute_sql("DROP SCHEMA IF EXISTS %s;" % self.fully_qualified_dataset_name())
+
     def execute_sql(
         self, sql: AnyStr, *args: Any, **kwargs: Any
     ) -> Optional[Sequence[Sequence[Any]]]:
diff --git a/dlt/destinations/impl/synapse/README.md b/dlt/destinations/impl/synapse/README.md
new file mode 100644
index 0000000000..b133faf67a
--- /dev/null
+++ b/dlt/destinations/impl/synapse/README.md
@@ -0,0 +1,58 @@
+# Set up loader user
+Execute the following SQL statements to set up the [loader](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql/data-loading-best-practices#create-a-loading-user) user:
+```sql
+-- on master database
+
+CREATE LOGIN loader WITH PASSWORD = 'YOUR_LOADER_PASSWORD_HERE';
+```
+
+```sql
+-- on minipool database
+
+CREATE USER loader FOR LOGIN loader;
+
+-- DDL permissions
+GRANT CREATE TABLE ON DATABASE :: minipool TO loader;
+GRANT CREATE VIEW ON DATABASE :: minipool TO loader;
+
+-- DML permissions
+GRANT SELECT ON DATABASE :: minipool TO loader;
+GRANT INSERT ON DATABASE :: minipool TO loader;
+GRANT ADMINISTER DATABASE BULK OPERATIONS TO loader;
+```
+
+```sql
+-- https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-workload-isolation
+
+CREATE WORKLOAD GROUP DataLoads
+WITH ( 
+    MIN_PERCENTAGE_RESOURCE = 0
+    ,CAP_PERCENTAGE_RESOURCE = 50
+    ,REQUEST_MIN_RESOURCE_GRANT_PERCENT = 25
+);
+
+CREATE WORKLOAD CLASSIFIER [wgcELTLogin]
+WITH (
+    WORKLOAD_GROUP = 'DataLoads'
+    ,MEMBERNAME = 'loader'
+);
+```
+
+# config.toml
+```toml
+[destination.synapse.credentials]
+database = "minipool"
+username = "loader"
+host = "dlt-synapse-ci.sql.azuresynapse.net"
+port = 1433
+driver = "ODBC Driver 18 for SQL Server"
+
+[destination.synapse]
+create_indexes = false
+```
+
+# secrets.toml
+```toml
+[destination.synapse.credentials]
+password = "YOUR_LOADER_PASSWORD_HERE"
+```
\ No newline at end of file
diff --git a/dlt/destinations/impl/synapse/__init__.py b/dlt/destinations/impl/synapse/__init__.py
new file mode 100644
index 0000000000..175b011186
--- /dev/null
+++ b/dlt/destinations/impl/synapse/__init__.py
@@ -0,0 +1,46 @@
+from dlt.common.data_writers.escape import escape_postgres_identifier, escape_mssql_literal
+from dlt.common.destination import DestinationCapabilitiesContext
+from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE
+from dlt.common.wei import EVM_DECIMAL_PRECISION
+
+
+def capabilities() -> DestinationCapabilitiesContext:
+    caps = DestinationCapabilitiesContext()
+
+    caps.preferred_loader_file_format = "insert_values"
+    caps.supported_loader_file_formats = ["insert_values"]
+    caps.preferred_staging_file_format = None
+    caps.supported_staging_file_formats = []
+
+    caps.insert_values_writer_type = "select_union"  # https://stackoverflow.com/a/77014299
+
+    caps.escape_identifier = escape_postgres_identifier
+    caps.escape_literal = escape_mssql_literal
+
+    # Synapse has a max precision of 38
+    # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#DataTypes
+    caps.decimal_precision = (DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE)
+    caps.wei_precision = (DEFAULT_NUMERIC_PRECISION, 0)
+
+    # https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse?view=aps-pdw-2016-au7#LimitationsRestrictions
+    caps.max_identifier_length = 128
+    caps.max_column_identifier_length = 128
+
+    # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-service-capacity-limits#queries
+    caps.max_query_length = 65536 * 4096
+    caps.is_max_query_length_in_bytes = True
+
+    # nvarchar(max) can store 2 GB
+    # https://learn.microsoft.com/en-us/sql/t-sql/data-types/nchar-and-nvarchar-transact-sql?view=sql-server-ver16#nvarchar---n--max--
+    caps.max_text_data_type_length = 2 * 1024 * 1024 * 1024
+    caps.is_max_text_data_type_length_in_bytes = True
+
+    # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-develop-transactions
+    caps.supports_transactions = True
+    caps.supports_ddl_transactions = False
+
+    # datetimeoffset can store 7 digits for fractional seconds
+    # https://learn.microsoft.com/en-us/sql/t-sql/data-types/datetimeoffset-transact-sql?view=sql-server-ver16
+    caps.timestamp_precision = 7
+
+    return caps
diff --git a/dlt/destinations/impl/synapse/configuration.py b/dlt/destinations/impl/synapse/configuration.py
new file mode 100644
index 0000000000..0596cc2c46
--- /dev/null
+++ b/dlt/destinations/impl/synapse/configuration.py
@@ -0,0 +1,38 @@
+from typing import Final, Any, List, Dict, Optional, ClassVar
+
+from dlt.common.configuration import configspec
+
+from dlt.destinations.impl.mssql.configuration import (
+    MsSqlCredentials,
+    MsSqlClientConfiguration,
+)
+from dlt.destinations.impl.mssql.configuration import MsSqlCredentials
+
+
+@configspec
+class SynapseCredentials(MsSqlCredentials):
+    drivername: Final[str] = "synapse"  # type: ignore
+
+    # LongAsMax keyword got introduced in ODBC Driver 18 for SQL Server.
+    SUPPORTED_DRIVERS: ClassVar[List[str]] = ["ODBC Driver 18 for SQL Server"]
+
+    def _get_odbc_dsn_dict(self) -> Dict[str, Any]:
+        params = super()._get_odbc_dsn_dict()
+        # Long types (text, ntext, image) are not supported on Synapse.
+        # Convert to max types using LongAsMax keyword.
+        # https://stackoverflow.com/a/57926224
+        params["LONGASMAX"] = "yes"
+        return params
+
+
+@configspec
+class SynapseClientConfiguration(MsSqlClientConfiguration):
+    destination_type: Final[str] = "synapse"  # type: ignore
+    credentials: SynapseCredentials
+
+    # Determines if `primary_key` and `unique` column hints are applied.
+    # Set to False by default because the PRIMARY KEY and UNIQUE constraints
+    # are tricky in Synapse: they are NOT ENFORCED and can lead to innacurate
+    # results if the user does not ensure all column values are unique.
+    # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-table-constraints
+    create_indexes: bool = False
diff --git a/dlt/destinations/impl/synapse/factory.py b/dlt/destinations/impl/synapse/factory.py
new file mode 100644
index 0000000000..fa7facc0ca
--- /dev/null
+++ b/dlt/destinations/impl/synapse/factory.py
@@ -0,0 +1,51 @@
+import typing as t
+
+from dlt.common.destination import Destination, DestinationCapabilitiesContext
+from dlt.destinations.impl.synapse import capabilities
+
+from dlt.destinations.impl.synapse.configuration import (
+    SynapseCredentials,
+    SynapseClientConfiguration,
+)
+
+if t.TYPE_CHECKING:
+    from dlt.destinations.impl.synapse.synapse import SynapseClient
+
+
+class synapse(Destination[SynapseClientConfiguration, "SynapseClient"]):
+    spec = SynapseClientConfiguration
+
+    def capabilities(self) -> DestinationCapabilitiesContext:
+        return capabilities()
+
+    @property
+    def client_class(self) -> t.Type["SynapseClient"]:
+        from dlt.destinations.impl.synapse.synapse import SynapseClient
+
+        return SynapseClient
+
+    def __init__(
+        self,
+        credentials: t.Union[SynapseCredentials, t.Dict[str, t.Any], str] = None,
+        create_indexes: bool = False,
+        destination_name: t.Optional[str] = None,
+        environment: t.Optional[str] = None,
+        **kwargs: t.Any,
+    ) -> None:
+        """Configure the Synapse destination to use in a pipeline.
+
+        All arguments provided here supersede other configuration sources such as environment variables and dlt config files.
+
+        Args:
+            credentials: Credentials to connect to the Synapse dedicated pool. Can be an instance of `SynapseCredentials` or
+                a connection string in the format `synapse://user:password@host:port/database`
+            create_indexes: Should unique indexes be created, defaults to False
+            **kwargs: Additional arguments passed to the destination config
+        """
+        super().__init__(
+            credentials=credentials,
+            create_indexes=create_indexes,
+            destination_name=destination_name,
+            environment=environment,
+            **kwargs,
+        )
diff --git a/dlt/destinations/impl/synapse/sql_client.py b/dlt/destinations/impl/synapse/sql_client.py
new file mode 100644
index 0000000000..089c58e57c
--- /dev/null
+++ b/dlt/destinations/impl/synapse/sql_client.py
@@ -0,0 +1,28 @@
+from typing import ClassVar
+from contextlib import suppress
+
+from dlt.common.destination import DestinationCapabilitiesContext
+
+from dlt.destinations.impl.mssql.sql_client import PyOdbcMsSqlClient
+from dlt.destinations.impl.mssql.configuration import MsSqlCredentials
+from dlt.destinations.impl.synapse import capabilities
+from dlt.destinations.impl.synapse.configuration import SynapseCredentials
+
+from dlt.destinations.exceptions import DatabaseUndefinedRelation
+
+
+class SynapseSqlClient(PyOdbcMsSqlClient):
+    capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities()
+
+    def drop_tables(self, *tables: str) -> None:
+        if not tables:
+            return
+        # Synapse does not support DROP TABLE IF EXISTS.
+        # Workaround: use DROP TABLE and suppress non-existence errors.
+        statements = [f"DROP TABLE {self.make_qualified_table_name(table)};" for table in tables]
+        with suppress(DatabaseUndefinedRelation):
+            self.execute_fragments(statements)
+
+    def _drop_schema(self) -> None:
+        # Synapse does not support DROP SCHEMA IF EXISTS.
+        self.execute_sql("DROP SCHEMA %s;" % self.fully_qualified_dataset_name())
diff --git a/dlt/destinations/impl/synapse/synapse.py b/dlt/destinations/impl/synapse/synapse.py
new file mode 100644
index 0000000000..18d1fa81d4
--- /dev/null
+++ b/dlt/destinations/impl/synapse/synapse.py
@@ -0,0 +1,99 @@
+from typing import ClassVar, Sequence, List, Dict, Any, Optional
+from copy import deepcopy
+
+from dlt.common.destination import DestinationCapabilitiesContext
+from dlt.common.destination.reference import SupportsStagingDestination, NewLoadJob
+
+from dlt.common.schema import TTableSchema, TColumnSchema, Schema, TColumnHint
+from dlt.common.schema.typing import TTableSchemaColumns
+
+from dlt.destinations.sql_jobs import SqlStagingCopyJob, SqlJobParams
+from dlt.destinations.sql_client import SqlClientBase
+from dlt.destinations.insert_job_client import InsertValuesJobClient
+from dlt.destinations.job_client_impl import SqlJobClientBase
+
+from dlt.destinations.impl.mssql.mssql import MsSqlTypeMapper, MsSqlClient, HINT_TO_MSSQL_ATTR
+
+from dlt.destinations.impl.synapse import capabilities
+from dlt.destinations.impl.synapse.sql_client import SynapseSqlClient
+from dlt.destinations.impl.synapse.configuration import SynapseClientConfiguration
+
+
+HINT_TO_SYNAPSE_ATTR: Dict[TColumnHint, str] = {
+    "primary_key": "PRIMARY KEY NONCLUSTERED NOT ENFORCED",
+    "unique": "UNIQUE NOT ENFORCED",
+}
+
+
+class SynapseClient(MsSqlClient, SupportsStagingDestination):
+    capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities()
+
+    def __init__(self, schema: Schema, config: SynapseClientConfiguration) -> None:
+        sql_client = SynapseSqlClient(config.normalize_dataset_name(schema), config.credentials)
+        InsertValuesJobClient.__init__(self, schema, config, sql_client)
+        self.config: SynapseClientConfiguration = config
+        self.sql_client = sql_client
+        self.type_mapper = MsSqlTypeMapper(self.capabilities)
+
+        self.active_hints = deepcopy(HINT_TO_SYNAPSE_ATTR)
+        if not self.config.create_indexes:
+            self.active_hints.pop("primary_key", None)
+            self.active_hints.pop("unique", None)
+
+    def _get_table_update_sql(
+        self, table_name: str, new_columns: Sequence[TColumnSchema], generate_alter: bool
+    ) -> List[str]:
+        _sql_result = SqlJobClientBase._get_table_update_sql(
+            self, table_name, new_columns, generate_alter
+        )
+        if not generate_alter:
+            # Append WITH clause to create heap table instead of default
+            # columnstore table. Heap tables are a more robust choice, because
+            # columnstore tables do not support varchar(max), nvarchar(max),
+            # and varbinary(max).
+            # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index
+            sql_result = [_sql_result[0] + "\n WITH ( HEAP );"]
+        else:
+            sql_result = _sql_result
+        return sql_result
+
+    def _create_replace_followup_jobs(
+        self, table_chain: Sequence[TTableSchema]
+    ) -> List[NewLoadJob]:
+        if self.config.replace_strategy == "staging-optimized":
+            return [SynapseStagingCopyJob.from_table_chain(table_chain, self.sql_client)]
+        return super()._create_replace_followup_jobs(table_chain)
+
+
+class SynapseStagingCopyJob(SqlStagingCopyJob):
+    @classmethod
+    def generate_sql(
+        cls,
+        table_chain: Sequence[TTableSchema],
+        sql_client: SqlClientBase[Any],
+        params: Optional[SqlJobParams] = None,
+    ) -> List[str]:
+        sql: List[str] = []
+        for table in table_chain:
+            with sql_client.with_staging_dataset(staging=True):
+                staging_table_name = sql_client.make_qualified_table_name(table["name"])
+            table_name = sql_client.make_qualified_table_name(table["name"])
+            # drop destination table
+            sql.append(f"DROP TABLE {table_name};")
+            # moving staging table to destination schema
+            sql.append(
+                f"ALTER SCHEMA {sql_client.fully_qualified_dataset_name()} TRANSFER"
+                f" {staging_table_name};"
+            )
+            # recreate staging table
+            # In some cases, when multiple instances of this CTAS query are
+            # executed concurrently, Synapse suspends the queries and hangs.
+            # This can be prevented by setting the env var LOAD__WORKERS = "1".
+            sql.append(
+                f"CREATE TABLE {staging_table_name}"
+                " WITH ( DISTRIBUTION = ROUND_ROBIN, HEAP )"  # distribution must be explicitly specified with CTAS
+                f" AS SELECT * FROM {table_name}"
+                " WHERE 1 = 0;"  # no data, table structure only
+            )
+
+        return sql
diff --git a/dlt/destinations/insert_job_client.py b/dlt/destinations/insert_job_client.py
index 678ba43bcc..776176078e 100644
--- a/dlt/destinations/insert_job_client.py
+++ b/dlt/destinations/insert_job_client.py
@@ -36,9 +36,10 @@ def _insert(self, qualified_table_name: str, file_path: str) -> Iterator[List[st
         # the procedure below will split the inserts into max_query_length // 2 packs
         with FileStorage.open_zipsafe_ro(file_path, "r", encoding="utf-8") as f:
             header = f.readline()
-            values_mark = f.readline()
-            # properly formatted file has a values marker at the beginning
-            assert values_mark == "VALUES\n"
+            if self._sql_client.capabilities.insert_values_writer_type == "default":
+                # properly formatted file has a values marker at the beginning
+                values_mark = f.readline()
+                assert values_mark == "VALUES\n"
 
             max_rows = self._sql_client.capabilities.max_rows_per_insert
 
@@ -67,7 +68,9 @@ def _insert(self, qualified_table_name: str, file_path: str) -> Iterator[List[st
                     # Chunk by max_rows - 1 for simplicity because one more row may be added
                     for chunk in chunks(values_rows, max_rows - 1):
                         processed += len(chunk)
-                        insert_sql.extend([header.format(qualified_table_name), values_mark])
+                        insert_sql.append(header.format(qualified_table_name))
+                        if self._sql_client.capabilities.insert_values_writer_type == "default":
+                            insert_sql.append(values_mark)
                         if processed == len_rows:
                             # On the last chunk we need to add the extra row read
                             insert_sql.append("".join(chunk) + until_nl)
@@ -76,7 +79,12 @@ def _insert(self, qualified_table_name: str, file_path: str) -> Iterator[List[st
                             insert_sql.append("".join(chunk).strip()[:-1] + ";\n")
                 else:
                     # otherwise write all content in a single INSERT INTO
-                    insert_sql.extend([header.format(qualified_table_name), values_mark, content])
+                    if self._sql_client.capabilities.insert_values_writer_type == "default":
+                        insert_sql.extend(
+                            [header.format(qualified_table_name), values_mark, content]
+                        )
+                    elif self._sql_client.capabilities.insert_values_writer_type == "select_union":
+                        insert_sql.extend([header.format(qualified_table_name), content])
 
                     if until_nl:
                         insert_sql.append(until_nl)
diff --git a/dlt/helpers/dbt/profiles.yml b/dlt/helpers/dbt/profiles.yml
index 2414222cbd..7031f5de2c 100644
--- a/dlt/helpers/dbt/profiles.yml
+++ b/dlt/helpers/dbt/profiles.yml
@@ -141,4 +141,20 @@ athena:
       schema: "{{ var('destination_dataset_name', var('source_dataset_name')) }}"
       database: "{{ env_var('DLT__AWS_DATA_CATALOG') }}"
       # aws_profile_name: "{{ env_var('DLT__CREDENTIALS__PROFILE_NAME', '') }}"
-      work_group: "{{ env_var('DLT__ATHENA_WORK_GROUP', '') }}"
\ No newline at end of file
+      work_group: "{{ env_var('DLT__ATHENA_WORK_GROUP', '') }}"
+
+
+# commented out because dbt for Synapse isn't currently properly supported.
+# Leave config here for potential future use.
+# synapse:
+#   target: analytics
+#   outputs:
+#     analytics:
+#       type: synapse
+#       driver: "{{ env_var('DLT__CREDENTIALS__DRIVER') }}"
+#       server: "{{ env_var('DLT__CREDENTIALS__HOST') }}"
+#       port: "{{ env_var('DLT__CREDENTIALS__PORT') | as_number }}"
+#       database: "{{ env_var('DLT__CREDENTIALS__DATABASE') }}"
+#       schema: "{{ var('destination_dataset_name', var('source_dataset_name')) }}"
+#       user: "{{ env_var('DLT__CREDENTIALS__USERNAME') }}"
+#       password: "{{ env_var('DLT__CREDENTIALS__PASSWORD') }}"    
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index c5da40c604..4d079fc44d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -8466,9 +8466,10 @@ qdrant = ["qdrant-client"]
 redshift = ["psycopg2-binary", "psycopg2cffi"]
 s3 = ["botocore", "s3fs"]
 snowflake = ["snowflake-connector-python"]
+synapse = ["pyodbc"]
 weaviate = ["weaviate-client"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.13"
-content-hash = "cf751b2e1e9c66efde0a11774b5204e3206a14fd04ba4c79b2d37e38db5367ad"
+content-hash = "26c595a857f17a5cbdb348f165c267d8910412325be4e522d0e91224c7fec588"
diff --git a/pyproject.toml b/pyproject.toml
index 6436ec23a7..d9d5858674 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -96,6 +96,7 @@ cli = ["pipdeptree", "cron-descriptor"]
 athena = ["pyathena", "pyarrow", "s3fs", "botocore"]
 weaviate = ["weaviate-client"]
 mssql = ["pyodbc"]
+synapse = ["pyodbc"]
 qdrant = ["qdrant-client"]
 
 [tool.poetry.scripts]
diff --git a/tests/load/mssql/test_mssql_credentials.py b/tests/load/mssql/test_mssql_credentials.py
index 0098d228f1..0e38791f22 100644
--- a/tests/load/mssql/test_mssql_credentials.py
+++ b/tests/load/mssql/test_mssql_credentials.py
@@ -1,18 +1,35 @@
 import pyodbc
 import pytest
 
-from dlt.common.configuration import resolve_configuration
+from dlt.common.configuration import resolve_configuration, ConfigFieldMissingException
 from dlt.common.exceptions import SystemConfigurationException
 
-from dlt.destinations.impl.mssql.configuration import MsSqlCredentials, SUPPORTED_DRIVERS
+from dlt.destinations.impl.mssql.configuration import MsSqlCredentials
 
 
-def test_parse_native_representation_unsupported_driver_specified() -> None:
+def test_mssql_credentials_defaults() -> None:
+    creds = MsSqlCredentials()
+    assert creds.port == 1433
+    assert creds.connect_timeout == 15
+    assert MsSqlCredentials.__config_gen_annotations__ == ["port", "connect_timeout"]
+    # port should be optional
+    resolve_configuration(creds, explicit_value="mssql://loader:loader@localhost/dlt_data")
+    assert creds.port == 1433
+
+
+def test_parse_native_representation() -> None:
     # Case: unsupported driver specified.
     with pytest.raises(SystemConfigurationException):
         resolve_configuration(
             MsSqlCredentials(
-                "mssql://test_user:test_password@sql.example.com:12345/test_db?DRIVER=foo"
+                "mssql://test_user:test_pwd@sql.example.com/test_db?DRIVER=ODBC+Driver+13+for+SQL+Server"
+            )
+        )
+    # Case: password not specified.
+    with pytest.raises(ConfigFieldMissingException):
+        resolve_configuration(
+            MsSqlCredentials(
+                "mssql://test_user@sql.example.com/test_db?DRIVER=ODBC+Driver+18+for+SQL+Server"
             )
         )
 
@@ -21,33 +38,49 @@ def test_to_odbc_dsn_supported_driver_specified() -> None:
     # Case: supported driver specified — ODBC Driver 18 for SQL Server.
     creds = resolve_configuration(
         MsSqlCredentials(
-            "mssql://test_user:test_password@sql.example.com:12345/test_db?DRIVER=ODBC+Driver+18+for+SQL+Server"
+            "mssql://test_user:test_pwd@sql.example.com/test_db?DRIVER=ODBC+Driver+18+for+SQL+Server"
         )
     )
     dsn = creds.to_odbc_dsn()
     result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))}
     assert result == {
         "DRIVER": "ODBC Driver 18 for SQL Server",
-        "SERVER": "sql.example.com,12345",
+        "SERVER": "sql.example.com,1433",
         "DATABASE": "test_db",
         "UID": "test_user",
-        "PWD": "test_password",
+        "PWD": "test_pwd",
     }
 
     # Case: supported driver specified — ODBC Driver 17 for SQL Server.
     creds = resolve_configuration(
         MsSqlCredentials(
-            "mssql://test_user:test_password@sql.example.com:12345/test_db?DRIVER=ODBC+Driver+17+for+SQL+Server"
+            "mssql://test_user:test_pwd@sql.example.com/test_db?DRIVER=ODBC+Driver+17+for+SQL+Server"
         )
     )
     dsn = creds.to_odbc_dsn()
     result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))}
     assert result == {
         "DRIVER": "ODBC Driver 17 for SQL Server",
+        "SERVER": "sql.example.com,1433",
+        "DATABASE": "test_db",
+        "UID": "test_user",
+        "PWD": "test_pwd",
+    }
+
+    # Case: port and supported driver specified.
+    creds = resolve_configuration(
+        MsSqlCredentials(
+            "mssql://test_user:test_pwd@sql.example.com:12345/test_db?DRIVER=ODBC+Driver+18+for+SQL+Server"
+        )
+    )
+    dsn = creds.to_odbc_dsn()
+    result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))}
+    assert result == {
+        "DRIVER": "ODBC Driver 18 for SQL Server",
         "SERVER": "sql.example.com,12345",
         "DATABASE": "test_db",
         "UID": "test_user",
-        "PWD": "test_password",
+        "PWD": "test_pwd",
     }
 
 
@@ -55,7 +88,7 @@ def test_to_odbc_dsn_arbitrary_keys_specified() -> None:
     # Case: arbitrary query keys (and supported driver) specified.
     creds = resolve_configuration(
         MsSqlCredentials(
-            "mssql://test_user:test_password@sql.example.com:12345/test_db?FOO=a&BAR=b&DRIVER=ODBC+Driver+18+for+SQL+Server"
+            "mssql://test_user:test_pwd@sql.example.com:12345/test_db?FOO=a&BAR=b&DRIVER=ODBC+Driver+18+for+SQL+Server"
         )
     )
     dsn = creds.to_odbc_dsn()
@@ -65,7 +98,7 @@ def test_to_odbc_dsn_arbitrary_keys_specified() -> None:
         "SERVER": "sql.example.com,12345",
         "DATABASE": "test_db",
         "UID": "test_user",
-        "PWD": "test_password",
+        "PWD": "test_pwd",
         "FOO": "a",
         "BAR": "b",
     }
@@ -73,7 +106,7 @@ def test_to_odbc_dsn_arbitrary_keys_specified() -> None:
     # Case: arbitrary capitalization.
     creds = resolve_configuration(
         MsSqlCredentials(
-            "mssql://test_user:test_password@sql.example.com:12345/test_db?FOO=a&bar=b&Driver=ODBC+Driver+18+for+SQL+Server"
+            "mssql://test_user:test_pwd@sql.example.com:12345/test_db?FOO=a&bar=b&Driver=ODBC+Driver+18+for+SQL+Server"
         )
     )
     dsn = creds.to_odbc_dsn()
@@ -83,30 +116,30 @@ def test_to_odbc_dsn_arbitrary_keys_specified() -> None:
         "SERVER": "sql.example.com,12345",
         "DATABASE": "test_db",
         "UID": "test_user",
-        "PWD": "test_password",
+        "PWD": "test_pwd",
         "FOO": "a",
         "BAR": "b",
     }
 
 
-available_drivers = [d for d in pyodbc.drivers() if d in SUPPORTED_DRIVERS]
+available_drivers = [d for d in pyodbc.drivers() if d in MsSqlCredentials.SUPPORTED_DRIVERS]
 
 
 @pytest.mark.skipif(not available_drivers, reason="no supported driver available")
 def test_to_odbc_dsn_driver_not_specified() -> None:
     # Case: driver not specified, but supported driver is available.
     creds = resolve_configuration(
-        MsSqlCredentials("mssql://test_user:test_password@sql.example.com:12345/test_db")
+        MsSqlCredentials("mssql://test_user:test_pwd@sql.example.com/test_db")
     )
     dsn = creds.to_odbc_dsn()
     result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))}
     assert result in [
         {
             "DRIVER": d,
-            "SERVER": "sql.example.com,12345",
+            "SERVER": "sql.example.com,1433",
             "DATABASE": "test_db",
             "UID": "test_user",
-            "PWD": "test_password",
+            "PWD": "test_pwd",
         }
-        for d in SUPPORTED_DRIVERS
+        for d in MsSqlCredentials.SUPPORTED_DRIVERS
     ]
diff --git a/tests/load/mssql/test_mssql_table_builder.py b/tests/load/mssql/test_mssql_table_builder.py
index f7e0ce53ff..039ce99113 100644
--- a/tests/load/mssql/test_mssql_table_builder.py
+++ b/tests/load/mssql/test_mssql_table_builder.py
@@ -1,11 +1,10 @@
 import pytest
-from copy import deepcopy
 import sqlfluff
 
 from dlt.common.utils import uniq_id
 from dlt.common.schema import Schema
 
-pytest.importorskip("dlt.destinations.mssql.mssql", reason="MSSQL ODBC driver not installed")
+pytest.importorskip("dlt.destinations.impl.mssql.mssql", reason="MSSQL ODBC driver not installed")
 
 from dlt.destinations.impl.mssql.mssql import MsSqlClient
 from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration, MsSqlCredentials
diff --git a/tests/load/pipeline/test_dbt_helper.py b/tests/load/pipeline/test_dbt_helper.py
index 11f59d5276..e919409311 100644
--- a/tests/load/pipeline/test_dbt_helper.py
+++ b/tests/load/pipeline/test_dbt_helper.py
@@ -37,6 +37,8 @@ def test_run_jaffle_package(
         pytest.skip(
             "dbt-athena requires database to be created and we don't do it in case of Jaffle"
         )
+    if not destination_config.supports_dbt:
+        pytest.skip("dbt is not supported for this destination configuration")
     pipeline = destination_config.setup_pipeline("jaffle_jaffle", full_refresh=True)
     # get runner, pass the env from fixture
     dbt = dlt.dbt.package(pipeline, "https://github.com/dbt-labs/jaffle_shop.git", venv=dbt_venv)
@@ -55,9 +57,10 @@ def test_run_jaffle_package(
     assert all(r.status == "pass" for r in tests)
 
     # get and display dataframe with customers
-    customers = select_data(pipeline, "SELECT * FROM customers")
+    qual_name = pipeline.sql_client().make_qualified_table_name
+    customers = select_data(pipeline, f"SELECT * FROM {qual_name('customers')}")
     assert len(customers) == 100
-    orders = select_data(pipeline, "SELECT * FROM orders")
+    orders = select_data(pipeline, f"SELECT * FROM {qual_name('orders')}")
     assert len(orders) == 99
 
 
diff --git a/tests/load/pipeline/test_replace_disposition.py b/tests/load/pipeline/test_replace_disposition.py
index c6db91efff..1dde56a6b1 100644
--- a/tests/load/pipeline/test_replace_disposition.py
+++ b/tests/load/pipeline/test_replace_disposition.py
@@ -264,6 +264,12 @@ def test_replace_table_clearing(
     # use staging tables for replace
     os.environ["DESTINATION__REPLACE_STRATEGY"] = replace_strategy
 
+    if destination_config.destination == "synapse" and replace_strategy == "staging-optimized":
+        # The "staging-optimized" replace strategy makes Synapse suspend the CTAS
+        # queries used to recreate the staging table, and hang, when the number
+        # of load workers is greater than 1.
+        os.environ["LOAD__WORKERS"] = "1"
+
     pipeline = destination_config.setup_pipeline(
         "test_replace_table_clearing", dataset_name="test_replace_table_clearing", full_refresh=True
     )
diff --git a/tests/load/synapse/__init__.py b/tests/load/synapse/__init__.py
new file mode 100644
index 0000000000..34119d38cb
--- /dev/null
+++ b/tests/load/synapse/__init__.py
@@ -0,0 +1,3 @@
+from tests.utils import skip_if_not_active
+
+skip_if_not_active("synapse")
diff --git a/tests/load/synapse/test_synapse_configuration.py b/tests/load/synapse/test_synapse_configuration.py
new file mode 100644
index 0000000000..4055cbab38
--- /dev/null
+++ b/tests/load/synapse/test_synapse_configuration.py
@@ -0,0 +1,46 @@
+import pytest
+
+from dlt.common.configuration import resolve_configuration
+from dlt.common.exceptions import SystemConfigurationException
+
+from dlt.destinations.impl.synapse.configuration import (
+    SynapseClientConfiguration,
+    SynapseCredentials,
+)
+
+
+def test_synapse_configuration() -> None:
+    # By default, unique indexes should not be created.
+    assert SynapseClientConfiguration().create_indexes is False
+
+
+def test_parse_native_representation() -> None:
+    # Case: unsupported driver specified.
+    with pytest.raises(SystemConfigurationException):
+        resolve_configuration(
+            SynapseCredentials(
+                "synapse://test_user:test_pwd@test.sql.azuresynapse.net/test_db?DRIVER=ODBC+Driver+17+for+SQL+Server"
+            )
+        )
+
+
+def test_to_odbc_dsn_longasmax() -> None:
+    # Case: LONGASMAX not specified in query (this is the expected scenario).
+    creds = resolve_configuration(
+        SynapseCredentials(
+            "synapse://test_user:test_pwd@test.sql.azuresynapse.net/test_db?DRIVER=ODBC+Driver+18+for+SQL+Server"
+        )
+    )
+    dsn = creds.to_odbc_dsn()
+    result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))}
+    assert result["LONGASMAX"] == "yes"
+
+    # Case: LONGASMAX specified in query; specified value should be overridden.
+    creds = resolve_configuration(
+        SynapseCredentials(
+            "synapse://test_user:test_pwd@test.sql.azuresynapse.net/test_db?DRIVER=ODBC+Driver+18+for+SQL+Server&LONGASMAX=no"
+        )
+    )
+    dsn = creds.to_odbc_dsn()
+    result = {k: v for k, v in (param.split("=") for param in dsn.split(";"))}
+    assert result["LONGASMAX"] == "yes"
diff --git a/tests/load/synapse/test_synapse_table_builder.py b/tests/load/synapse/test_synapse_table_builder.py
new file mode 100644
index 0000000000..f58a7d5883
--- /dev/null
+++ b/tests/load/synapse/test_synapse_table_builder.py
@@ -0,0 +1,130 @@
+import os
+import pytest
+import sqlfluff
+from copy import deepcopy
+from sqlfluff.api.simple import APIParsingError
+
+from dlt.common.utils import uniq_id
+from dlt.common.schema import Schema, TColumnHint
+
+from dlt.destinations.impl.synapse.synapse import SynapseClient
+from dlt.destinations.impl.synapse.configuration import (
+    SynapseClientConfiguration,
+    SynapseCredentials,
+)
+
+from tests.load.utils import TABLE_UPDATE
+from dlt.destinations.impl.synapse.synapse import HINT_TO_SYNAPSE_ATTR
+
+
+@pytest.fixture
+def schema() -> Schema:
+    return Schema("event")
+
+
+@pytest.fixture
+def client(schema: Schema) -> SynapseClient:
+    # return client without opening connection
+    client = SynapseClient(
+        schema,
+        SynapseClientConfiguration(
+            dataset_name="test_" + uniq_id(), credentials=SynapseCredentials()
+        ),
+    )
+    assert client.config.create_indexes is False
+    return client
+
+
+@pytest.fixture
+def client_with_indexes_enabled(schema: Schema) -> SynapseClient:
+    # return client without opening connection
+    client = SynapseClient(
+        schema,
+        SynapseClientConfiguration(
+            dataset_name="test_" + uniq_id(), credentials=SynapseCredentials(), create_indexes=True
+        ),
+    )
+    assert client.config.create_indexes is True
+    return client
+
+
+def test_create_table(client: SynapseClient) -> None:
+    # non existing table
+    sql = client._get_table_update_sql("event_test_table", TABLE_UPDATE, False)[0]
+    sqlfluff.parse(sql, dialect="tsql")
+    assert "event_test_table" in sql
+    assert '"col1" bigint  NOT NULL' in sql
+    assert '"col2" float  NOT NULL' in sql
+    assert '"col3" bit  NOT NULL' in sql
+    assert '"col4" datetimeoffset  NOT NULL' in sql
+    assert '"col5" nvarchar(max)  NOT NULL' in sql
+    assert '"col6" decimal(38,9)  NOT NULL' in sql
+    assert '"col7" varbinary(max)  NOT NULL' in sql
+    assert '"col8" decimal(38,0)' in sql
+    assert '"col9" nvarchar(max)  NOT NULL' in sql
+    assert '"col10" date  NOT NULL' in sql
+    assert '"col11" time  NOT NULL' in sql
+    assert '"col1_precision" smallint  NOT NULL' in sql
+    assert '"col4_precision" datetimeoffset(3)  NOT NULL' in sql
+    assert '"col5_precision" nvarchar(25)' in sql
+    assert '"col6_precision" decimal(6,2)  NOT NULL' in sql
+    assert '"col7_precision" varbinary(19)' in sql
+    assert '"col11_precision" time(3)  NOT NULL' in sql
+    assert "WITH ( HEAP )" in sql
+
+
+def test_alter_table(client: SynapseClient) -> None:
+    # existing table has no columns
+    sql = client._get_table_update_sql("event_test_table", TABLE_UPDATE, True)[0]
+    sqlfluff.parse(sql, dialect="tsql")
+    canonical_name = client.sql_client.make_qualified_table_name("event_test_table")
+    assert sql.count(f"ALTER TABLE {canonical_name}\nADD") == 1
+    assert "event_test_table" in sql
+    assert '"col1" bigint  NOT NULL' in sql
+    assert '"col2" float  NOT NULL' in sql
+    assert '"col3" bit  NOT NULL' in sql
+    assert '"col4" datetimeoffset  NOT NULL' in sql
+    assert '"col5" nvarchar(max)  NOT NULL' in sql
+    assert '"col6" decimal(38,9)  NOT NULL' in sql
+    assert '"col7" varbinary(max)  NOT NULL' in sql
+    assert '"col8" decimal(38,0)' in sql
+    assert '"col9" nvarchar(max)  NOT NULL' in sql
+    assert '"col10" date  NOT NULL' in sql
+    assert '"col11" time  NOT NULL' in sql
+    assert '"col1_precision" smallint  NOT NULL' in sql
+    assert '"col4_precision" datetimeoffset(3)  NOT NULL' in sql
+    assert '"col5_precision" nvarchar(25)' in sql
+    assert '"col6_precision" decimal(6,2)  NOT NULL' in sql
+    assert '"col7_precision" varbinary(19)' in sql
+    assert '"col11_precision" time(3)  NOT NULL' in sql
+    assert "WITH ( HEAP )" not in sql
+
+
+@pytest.mark.parametrize("hint", ["primary_key", "unique"])
+def test_create_table_with_column_hint(
+    client: SynapseClient, client_with_indexes_enabled: SynapseClient, hint: TColumnHint
+) -> None:
+    attr = HINT_TO_SYNAPSE_ATTR[hint]
+
+    # Case: table without hint.
+    sql = client._get_table_update_sql("event_test_table", TABLE_UPDATE, False)[0]
+    sqlfluff.parse(sql, dialect="tsql")
+    assert f" {attr} " not in sql
+
+    # Case: table with hint, but client does not have indexes enabled.
+    mod_update = deepcopy(TABLE_UPDATE)
+    mod_update[0][hint] = True  # type: ignore[typeddict-unknown-key]
+    sql = client._get_table_update_sql("event_test_table", mod_update, False)[0]
+    sqlfluff.parse(sql, dialect="tsql")
+    assert f" {attr} " not in sql
+
+    # Case: table with hint, client has indexes enabled.
+    sql = client_with_indexes_enabled._get_table_update_sql("event_test_table", mod_update, False)[
+        0
+    ]
+    # We expect an error because "PRIMARY KEY NONCLUSTERED NOT ENFORCED" and
+    # "UNIQUE NOT ENFORCED" are invalid in the generic "tsql" dialect.
+    # They are however valid in the Synapse variant of the dialect.
+    with pytest.raises(APIParsingError):
+        sqlfluff.parse(sql, dialect="tsql")
+    assert f'"col1" bigint {attr} NOT NULL' in sql
diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py
index 153504bf4a..b8d2e31e3f 100644
--- a/tests/load/test_job_client.py
+++ b/tests/load/test_job_client.py
@@ -387,7 +387,8 @@ def test_get_storage_table_with_all_types(client: SqlJobClientBase) -> None:
             "time",
         ):
             continue
-        if client.config.destination_type == "mssql" and c["data_type"] in ("complex"):
+        # mssql and synapse have no native data type for the complex type.
+        if client.config.destination_type in ("mssql", "synapse") and c["data_type"] in ("complex"):
             continue
         assert c["data_type"] == expected_c["data_type"]
 
diff --git a/tests/load/test_sql_client.py b/tests/load/test_sql_client.py
index 96f0db09bb..4bdf08e23c 100644
--- a/tests/load/test_sql_client.py
+++ b/tests/load/test_sql_client.py
@@ -38,7 +38,7 @@ def client(request) -> Iterator[SqlJobClientBase]:
 
 @pytest.mark.parametrize(
     "client",
-    destinations_configs(default_sql_configs=True, exclude=["mssql"]),
+    destinations_configs(default_sql_configs=True, exclude=["mssql", "synapse"]),
     indirect=True,
     ids=lambda x: x.name,
 )
@@ -263,9 +263,15 @@ def test_execute_df(client: SqlJobClientBase) -> None:
     client.update_stored_schema()
     table_name = prepare_temp_table(client)
     f_q_table_name = client.sql_client.make_qualified_table_name(table_name)
-    insert_query = ",".join([f"({idx})" for idx in range(0, total_records)])
 
-    client.sql_client.execute_sql(f"INSERT INTO {f_q_table_name} VALUES {insert_query};")
+    if client.capabilities.insert_values_writer_type == "default":
+        insert_query = ",".join([f"({idx})" for idx in range(0, total_records)])
+        sql_stmt = f"INSERT INTO {f_q_table_name} VALUES {insert_query};"
+    elif client.capabilities.insert_values_writer_type == "select_union":
+        insert_query = " UNION ALL ".join([f"SELECT {idx}" for idx in range(0, total_records)])
+        sql_stmt = f"INSERT INTO {f_q_table_name} {insert_query};"
+
+    client.sql_client.execute_sql(sql_stmt)
     with client.sql_client.execute_query(
         f"SELECT * FROM {f_q_table_name} ORDER BY col ASC"
     ) as curr:
diff --git a/tests/load/utils.py b/tests/load/utils.py
index 6811ca59a6..55445e0b95 100644
--- a/tests/load/utils.py
+++ b/tests/load/utils.py
@@ -163,7 +163,7 @@ def destinations_configs(
         destination_configs += [
             DestinationTestConfiguration(destination=destination)
             for destination in SQL_DESTINATIONS
-            if destination != "athena"
+            if destination not in ("athena", "synapse")
         ]
         destination_configs += [
             DestinationTestConfiguration(destination="duckdb", file_format="parquet")
@@ -190,6 +190,10 @@ def destinations_configs(
                 extra_info="iceberg",
             )
         ]
+        # dbt for Synapse has some complications and I couldn't get it to pass all tests.
+        destination_configs += [
+            DestinationTestConfiguration(destination="synapse", supports_dbt=False)
+        ]
 
     if default_vector_configs:
         # for now only weaviate
@@ -465,7 +469,6 @@ def yield_client_with_storage(
     ) as client:
         client.initialize_storage()
         yield client
-        # print(dataset_name)
         client.sql_client.drop_dataset()
         if isinstance(client, WithStagingDataset):
             with client.with_staging_dataset():
diff --git a/tests/utils.py b/tests/utils.py
index cf172f9733..211f87874d 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -45,6 +45,7 @@
     "motherduck",
     "mssql",
     "qdrant",
+    "synapse",
 }
 NON_SQL_DESTINATIONS = {"filesystem", "weaviate", "dummy", "motherduck", "qdrant"}
 SQL_DESTINATIONS = IMPLEMENTED_DESTINATIONS - NON_SQL_DESTINATIONS

From 05b05305ee46108261789ed25442aec518b1cca6 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Thu, 18 Jan 2024 16:44:34 +0100
Subject: [PATCH 02/23] make var type consistent

---
 dlt/common/data_writers/escape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlt/common/data_writers/escape.py b/dlt/common/data_writers/escape.py
index b56a0d8f19..20932fec6c 100644
--- a/dlt/common/data_writers/escape.py
+++ b/dlt/common/data_writers/escape.py
@@ -101,7 +101,7 @@ def escape_mssql_literal(v: Any) -> Any:
         # 8000 is the max value for n in VARBINARY(n)
         # https://learn.microsoft.com/en-us/sql/t-sql/data-types/binary-and-varbinary-transact-sql
         if len(v) <= 8000:
-            n = len(v)
+            n = str(len(v))
         else:
             n = "MAX"
         return f"CONVERT(VARBINARY({n}), '{v.hex()}', 2)"

From dc7619ad6f778b55cefaa09a3d3ef194ae5bc07a Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Thu, 18 Jan 2024 17:12:32 +0100
Subject: [PATCH 03/23] simplify client init logic

---
 dlt/destinations/impl/synapse/synapse.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dlt/destinations/impl/synapse/synapse.py b/dlt/destinations/impl/synapse/synapse.py
index 18d1fa81d4..0ad959f7ab 100644
--- a/dlt/destinations/impl/synapse/synapse.py
+++ b/dlt/destinations/impl/synapse/synapse.py
@@ -12,7 +12,7 @@
 from dlt.destinations.insert_job_client import InsertValuesJobClient
 from dlt.destinations.job_client_impl import SqlJobClientBase
 
-from dlt.destinations.impl.mssql.mssql import MsSqlTypeMapper, MsSqlClient, HINT_TO_MSSQL_ATTR
+from dlt.destinations.impl.mssql.mssql import MsSqlClient
 
 from dlt.destinations.impl.synapse import capabilities
 from dlt.destinations.impl.synapse.sql_client import SynapseSqlClient
@@ -29,11 +29,11 @@ class SynapseClient(MsSqlClient, SupportsStagingDestination):
     capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities()
 
     def __init__(self, schema: Schema, config: SynapseClientConfiguration) -> None:
-        sql_client = SynapseSqlClient(config.normalize_dataset_name(schema), config.credentials)
-        InsertValuesJobClient.__init__(self, schema, config, sql_client)
+        super().__init__(schema, config)
         self.config: SynapseClientConfiguration = config
-        self.sql_client = sql_client
-        self.type_mapper = MsSqlTypeMapper(self.capabilities)
+        self.sql_client = SynapseSqlClient(
+            config.normalize_dataset_name(schema), config.credentials
+        )
 
         self.active_hints = deepcopy(HINT_TO_SYNAPSE_ATTR)
         if not self.config.create_indexes:

From 702dd28032fd6a1e36214d34131373afbbed03ba Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Sun, 21 Jan 2024 01:34:48 +0100
Subject: [PATCH 04/23] add support for table index type configuration

---
 dlt/common/data_writers/escape.py             |   6 +-
 dlt/common/destination/reference.py           |   4 +-
 dlt/common/schema/schema.py                   |   8 +
 dlt/common/schema/typing.py                   |   3 +
 dlt/common/schema/utils.py                    |  12 ++
 dlt/destinations/impl/mssql/mssql.py          |   2 +
 .../impl/synapse/configuration.py             |   9 +-
 dlt/destinations/impl/synapse/factory.py      |   7 +
 dlt/destinations/impl/synapse/synapse.py      |  96 ++++++++++--
 dlt/extract/decorators.py                     |   7 +
 dlt/extract/hints.py                          |   3 +
 tests/load/pipeline/test_table_indexing.py    | 140 ++++++++++++++++++
 .../synapse/test_synapse_table_builder.py     |  13 +-
 13 files changed, 292 insertions(+), 18 deletions(-)
 create mode 100644 tests/load/pipeline/test_table_indexing.py

diff --git a/dlt/common/data_writers/escape.py b/dlt/common/data_writers/escape.py
index 20932fec6c..1de584de2e 100644
--- a/dlt/common/data_writers/escape.py
+++ b/dlt/common/data_writers/escape.py
@@ -98,9 +98,9 @@ def escape_mssql_literal(v: Any) -> Any:
             json.dumps(v), prefix="N'", escape_dict=MS_SQL_ESCAPE_DICT, escape_re=MS_SQL_ESCAPE_RE
         )
     if isinstance(v, bytes):
-        # 8000 is the max value for n in VARBINARY(n)
-        # https://learn.microsoft.com/en-us/sql/t-sql/data-types/binary-and-varbinary-transact-sql
-        if len(v) <= 8000:
+        from dlt.destinations.impl.mssql.mssql import VARBINARY_MAX_N
+
+        if len(v) <= VARBINARY_MAX_N:
             n = str(len(v))
         else:
             n = "MAX"
diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py
index 1c28dffa8c..59f13b30b9 100644
--- a/dlt/common/destination/reference.py
+++ b/dlt/common/destination/reference.py
@@ -34,7 +34,7 @@
 from dlt.common.schema import Schema, TTableSchema, TSchemaTables
 from dlt.common.schema.typing import TWriteDisposition
 from dlt.common.schema.exceptions import InvalidDatasetName
-from dlt.common.schema.utils import get_write_disposition, get_table_format
+from dlt.common.schema.utils import get_write_disposition, get_table_format, get_table_index_type
 from dlt.common.configuration import configspec, with_config, resolve_configuration, known_sections
 from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration
 from dlt.common.configuration.accessors import config
@@ -372,6 +372,8 @@ def get_load_table(self, table_name: str, prepare_for_staging: bool = False) ->
                 table["write_disposition"] = get_write_disposition(self.schema.tables, table_name)
             if "table_format" not in table:
                 table["table_format"] = get_table_format(self.schema.tables, table_name)
+            if "table_index_type" not in table:
+                table["table_index_type"] = get_table_index_type(self.schema.tables, table_name)
             return table
         except KeyError:
             raise UnknownTableException(table_name)
diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py
index e95699b91e..ccfc038085 100644
--- a/dlt/common/schema/schema.py
+++ b/dlt/common/schema/schema.py
@@ -546,12 +546,20 @@ def data_tables(self, include_incomplete: bool = False) -> List[TTableSchema]:
             )
         ]
 
+    def data_table_names(self) -> List[str]:
+        """Returns list of table table names. Excludes dlt table names."""
+        return [t["name"] for t in self.data_tables()]
+
     def dlt_tables(self) -> List[TTableSchema]:
         """Gets dlt tables"""
         return [
             t for t in self._schema_tables.values() if t["name"].startswith(self._dlt_tables_prefix)
         ]
 
+    def dlt_table_names(self) -> List[str]:
+        """Returns list of dlt table names."""
+        return [t["name"] for t in self.dlt_tables()]
+
     def get_preferred_type(self, col_name: str) -> Optional[TDataType]:
         return next((m[1] for m in self._compiled_preferred_types if m[0].search(col_name)), None)
 
diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py
index 9a27cbe4bb..351d666553 100644
--- a/dlt/common/schema/typing.py
+++ b/dlt/common/schema/typing.py
@@ -62,6 +62,8 @@
 """Known hints of a column used to declare hint regexes."""
 TWriteDisposition = Literal["skip", "append", "replace", "merge"]
 TTableFormat = Literal["iceberg"]
+TTableIndexType = Literal["heap", "clustered_columnstore_index"]
+"Table index type. Currently only used for Synapse destination."
 TTypeDetections = Literal[
     "timestamp", "iso_timestamp", "iso_date", "large_integer", "hexbytes_to_text", "wei_to_double"
 ]
@@ -165,6 +167,7 @@ class TTableSchema(TypedDict, total=False):
     columns: TTableSchemaColumns
     resource: Optional[str]
     table_format: Optional[TTableFormat]
+    table_index_type: Optional[TTableIndexType]
 
 
 class TPartialTableSchema(TTableSchema):
diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py
index dc243f50dd..5ea244148e 100644
--- a/dlt/common/schema/utils.py
+++ b/dlt/common/schema/utils.py
@@ -32,6 +32,7 @@
     TColumnSchema,
     TColumnProp,
     TTableFormat,
+    TTableIndexType,
     TColumnHint,
     TTypeDetectionFunc,
     TTypeDetections,
@@ -618,6 +619,14 @@ def get_table_format(tables: TSchemaTables, table_name: str) -> TTableFormat:
     )
 
 
+def get_table_index_type(tables: TSchemaTables, table_name: str) -> TTableIndexType:
+    """Returns table index type of a table if present. If not, looks up into parent table."""
+    return cast(
+        TTableIndexType,
+        get_inherited_table_hint(tables, table_name, "table_index_type", allow_none=True),
+    )
+
+
 def table_schema_has_type(table: TTableSchema, _typ: TDataType) -> bool:
     """Checks if `table` schema contains column with type _typ"""
     return any(c.get("data_type") == _typ for c in table["columns"].values())
@@ -724,6 +733,7 @@ def new_table(
     resource: str = None,
     schema_contract: TSchemaContract = None,
     table_format: TTableFormat = None,
+    table_index_type: TTableIndexType = None,
 ) -> TTableSchema:
     table: TTableSchema = {
         "name": table_name,
@@ -742,6 +752,8 @@ def new_table(
             table["schema_contract"] = schema_contract
         if table_format:
             table["table_format"] = table_format
+    if table_index_type is not None:
+        table["table_index_type"] = table_index_type
     if validate_schema:
         validate_dict_ignoring_xkeys(
             spec=TColumnSchema,
diff --git a/dlt/destinations/impl/mssql/mssql.py b/dlt/destinations/impl/mssql/mssql.py
index e97389f185..b6af345e36 100644
--- a/dlt/destinations/impl/mssql/mssql.py
+++ b/dlt/destinations/impl/mssql/mssql.py
@@ -20,6 +20,8 @@
 
 
 HINT_TO_MSSQL_ATTR: Dict[TColumnHint, str] = {"unique": "UNIQUE"}
+VARCHAR_MAX_N: int = 4000
+VARBINARY_MAX_N: int = 8000
 
 
 class MsSqlTypeMapper(TypeMapper):
diff --git a/dlt/destinations/impl/synapse/configuration.py b/dlt/destinations/impl/synapse/configuration.py
index 0596cc2c46..966997b5a2 100644
--- a/dlt/destinations/impl/synapse/configuration.py
+++ b/dlt/destinations/impl/synapse/configuration.py
@@ -1,6 +1,7 @@
 from typing import Final, Any, List, Dict, Optional, ClassVar
 
 from dlt.common.configuration import configspec
+from dlt.common.schema.typing import TTableIndexType
 
 from dlt.destinations.impl.mssql.configuration import (
     MsSqlCredentials,
@@ -30,9 +31,15 @@ class SynapseClientConfiguration(MsSqlClientConfiguration):
     destination_type: Final[str] = "synapse"  # type: ignore
     credentials: SynapseCredentials
 
+    # While Synapse uses CLUSTERED COLUMNSTORE INDEX tables by default, we use
+    # HEAP tables (no indexing) by default. HEAP is a more robust choice, because
+    # columnstore tables do not support varchar(max), nvarchar(max), and varbinary(max).
+    # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index
+    default_table_index_type: Optional[TTableIndexType] = "heap"
+
     # Determines if `primary_key` and `unique` column hints are applied.
     # Set to False by default because the PRIMARY KEY and UNIQUE constraints
     # are tricky in Synapse: they are NOT ENFORCED and can lead to innacurate
     # results if the user does not ensure all column values are unique.
     # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-table-constraints
-    create_indexes: bool = False
+    create_indexes: Optional[bool] = False
diff --git a/dlt/destinations/impl/synapse/factory.py b/dlt/destinations/impl/synapse/factory.py
index fa7facc0ca..6bdf2946b6 100644
--- a/dlt/destinations/impl/synapse/factory.py
+++ b/dlt/destinations/impl/synapse/factory.py
@@ -1,6 +1,7 @@
 import typing as t
 
 from dlt.common.destination import Destination, DestinationCapabilitiesContext
+from dlt.common.schema.typing import TTableIndexType
 from dlt.destinations.impl.synapse import capabilities
 
 from dlt.destinations.impl.synapse.configuration import (
@@ -27,6 +28,7 @@ def client_class(self) -> t.Type["SynapseClient"]:
     def __init__(
         self,
         credentials: t.Union[SynapseCredentials, t.Dict[str, t.Any], str] = None,
+        default_table_index_type: t.Optional[TTableIndexType] = "heap",
         create_indexes: bool = False,
         destination_name: t.Optional[str] = None,
         environment: t.Optional[str] = None,
@@ -39,11 +41,16 @@ def __init__(
         Args:
             credentials: Credentials to connect to the Synapse dedicated pool. Can be an instance of `SynapseCredentials` or
                 a connection string in the format `synapse://user:password@host:port/database`
+            default_table_index_type: Table index type that is used if no
+                table index type is specified on the resource. This setting only
+                applies to data tables, dlt system tables are not affected
+                (they always have "heap" as table index type).
             create_indexes: Should unique indexes be created, defaults to False
             **kwargs: Additional arguments passed to the destination config
         """
         super().__init__(
             credentials=credentials,
+            default_table_index_type=default_table_index_type,
             create_indexes=create_indexes,
             destination_name=destination_name,
             environment=environment,
diff --git a/dlt/destinations/impl/synapse/synapse.py b/dlt/destinations/impl/synapse/synapse.py
index 0ad959f7ab..e01e851d83 100644
--- a/dlt/destinations/impl/synapse/synapse.py
+++ b/dlt/destinations/impl/synapse/synapse.py
@@ -1,18 +1,24 @@
-from typing import ClassVar, Sequence, List, Dict, Any, Optional
+from typing import ClassVar, Sequence, List, Dict, Any, Optional, cast
 from copy import deepcopy
+from textwrap import dedent
 
 from dlt.common.destination import DestinationCapabilitiesContext
 from dlt.common.destination.reference import SupportsStagingDestination, NewLoadJob
 
 from dlt.common.schema import TTableSchema, TColumnSchema, Schema, TColumnHint
-from dlt.common.schema.typing import TTableSchemaColumns
+from dlt.common.schema.typing import TTableSchemaColumns, TTableIndexType
 
 from dlt.destinations.sql_jobs import SqlStagingCopyJob, SqlJobParams
 from dlt.destinations.sql_client import SqlClientBase
 from dlt.destinations.insert_job_client import InsertValuesJobClient
 from dlt.destinations.job_client_impl import SqlJobClientBase
 
-from dlt.destinations.impl.mssql.mssql import MsSqlClient
+from dlt.destinations.impl.mssql.mssql import (
+    MsSqlTypeMapper,
+    MsSqlClient,
+    VARCHAR_MAX_N,
+    VARBINARY_MAX_N,
+)
 
 from dlt.destinations.impl.synapse import capabilities
 from dlt.destinations.impl.synapse.sql_client import SynapseSqlClient
@@ -23,9 +29,13 @@
     "primary_key": "PRIMARY KEY NONCLUSTERED NOT ENFORCED",
     "unique": "UNIQUE NOT ENFORCED",
 }
+TABLE_INDEX_TYPE_TO_SYNAPSE_ATTR: Dict[TTableIndexType, str] = {
+    "heap": "HEAP",
+    "clustered_columnstore_index": "CLUSTERED COLUMNSTORE INDEX",
+}
 
 
-class SynapseClient(MsSqlClient, SupportsStagingDestination):
+class SynapseClient(MsSqlClient):
     capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities()
 
     def __init__(self, schema: Schema, config: SynapseClientConfiguration) -> None:
@@ -43,20 +53,54 @@ def __init__(self, schema: Schema, config: SynapseClientConfiguration) -> None:
     def _get_table_update_sql(
         self, table_name: str, new_columns: Sequence[TColumnSchema], generate_alter: bool
     ) -> List[str]:
+        table = self.get_load_table(table_name)
+        if table is None:
+            table_index_type = self.config.default_table_index_type
+        else:
+            table_index_type = table.get("table_index_type")
+            if table_index_type == "clustered_columnstore_index":
+                new_columns = self._get_columstore_valid_columns(new_columns)
+
         _sql_result = SqlJobClientBase._get_table_update_sql(
             self, table_name, new_columns, generate_alter
         )
         if not generate_alter:
-            # Append WITH clause to create heap table instead of default
-            # columnstore table. Heap tables are a more robust choice, because
-            # columnstore tables do not support varchar(max), nvarchar(max),
-            # and varbinary(max).
-            # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index
-            sql_result = [_sql_result[0] + "\n WITH ( HEAP );"]
+            table_index_type_attr = TABLE_INDEX_TYPE_TO_SYNAPSE_ATTR[table_index_type]
+            sql_result = [_sql_result[0] + f"\n WITH ( {table_index_type_attr} );"]
         else:
             sql_result = _sql_result
         return sql_result
 
+    def _get_columstore_valid_columns(
+        self, columns: Sequence[TColumnSchema]
+    ) -> Sequence[TColumnSchema]:
+        return [self._get_columstore_valid_column(c) for c in columns]
+
+    def _get_columstore_valid_column(self, c: TColumnSchema) -> TColumnSchema:
+        """
+        Returns TColumnSchema that maps to a Synapse data type that can participate in a columnstore index.
+
+        varchar(max), nvarchar(max), and varbinary(max) are replaced with
+        varchar(n), nvarchar(n), and varbinary(n), respectively, where
+        n equals the user-specified precision, or the maximum allowed
+        value if the user did not specify a precision.
+        """
+        varchar_source_types = [
+            sct
+            for sct, dbt in MsSqlTypeMapper.sct_to_unbound_dbt.items()
+            if dbt in ("varchar(max)", "nvarchar(max)")
+        ]
+        varbinary_source_types = [
+            sct
+            for sct, dbt in MsSqlTypeMapper.sct_to_unbound_dbt.items()
+            if dbt == "varbinary(max)"
+        ]
+        if c["data_type"] in varchar_source_types and "precision" not in c:
+            return {**c, **{"precision": VARCHAR_MAX_N}}
+        elif c["data_type"] in varbinary_source_types and "precision" not in c:
+            return {**c, **{"precision": VARBINARY_MAX_N}}
+        return c
+
     def _create_replace_followup_jobs(
         self, table_chain: Sequence[TTableSchema]
     ) -> List[NewLoadJob]:
@@ -64,6 +108,38 @@ def _create_replace_followup_jobs(
             return [SynapseStagingCopyJob.from_table_chain(table_chain, self.sql_client)]
         return super()._create_replace_followup_jobs(table_chain)
 
+    def get_load_table(self, table_name: str, staging: bool = False) -> TTableSchema:
+        table = super().get_load_table(table_name, staging)
+        if table is None:
+            return None
+        if table_name in self.schema.dlt_table_names():
+            # dlt tables should always be heap tables, regardless of the user
+            # configuration. Why? "For small lookup tables, less than 60 million rows,
+            # consider using HEAP or clustered index for faster query performance."
+            # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#heap-tables
+            table["table_index_type"] = "heap"
+        if table["table_index_type"] is None:
+            table["table_index_type"] = self.config.default_table_index_type
+        return table
+
+    def get_storage_table_index_type(self, table_name: str) -> TTableIndexType:
+        """Returns table index type of table in storage destination."""
+        with self.sql_client as sql_client:
+            schema_name = sql_client.fully_qualified_dataset_name(escape=False)
+            sql = dedent(f"""
+                SELECT
+                    CASE i.type_desc
+                        WHEN 'HEAP' THEN 'heap'
+                        WHEN 'CLUSTERED COLUMNSTORE' THEN 'clustered_columnstore_index'
+                    END AS table_index_type
+                FROM sys.indexes i
+                INNER JOIN sys.tables t ON t.object_id = i.object_id
+                INNER JOIN sys.schemas s ON s.schema_id = t.schema_id
+                WHERE s.name = '{schema_name}' AND t.name = '{table_name}'
+            """)
+            table_index_type = sql_client.execute_sql(sql)[0][0]
+            return cast(TTableIndexType, table_index_type)
+
 
 class SynapseStagingCopyJob(SqlStagingCopyJob):
     @classmethod
diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py
index cf7426e683..573d3d3ad0 100644
--- a/dlt/extract/decorators.py
+++ b/dlt/extract/decorators.py
@@ -36,6 +36,7 @@
     TAnySchemaColumns,
     TSchemaContract,
     TTableFormat,
+    TTableIndexType,
 )
 from dlt.extract.utils import (
     ensure_table_schema_columns_hint,
@@ -256,6 +257,7 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
+    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
 ) -> DltResource: ...
@@ -273,6 +275,7 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
+    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
 ) -> Callable[[Callable[TResourceFunParams, Any]], DltResource]: ...
@@ -290,6 +293,7 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
+    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
     standalone: Literal[True] = True,
@@ -308,6 +312,7 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
+    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
 ) -> DltResource: ...
@@ -324,6 +329,7 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
+    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
     standalone: bool = False,
@@ -403,6 +409,7 @@ def make_resource(
             merge_key=merge_key,
             schema_contract=schema_contract,
             table_format=table_format,
+            table_index_type=table_index_type,
         )
         return DltResource.from_data(
             _data,
diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py
index 437dbbc6bd..36354eb0da 100644
--- a/dlt/extract/hints.py
+++ b/dlt/extract/hints.py
@@ -12,6 +12,7 @@
     TWriteDisposition,
     TAnySchemaColumns,
     TTableFormat,
+    TTableIndexType,
     TSchemaContract,
 )
 from dlt.common.typing import TDataItem
@@ -274,6 +275,7 @@ def new_table_template(
         merge_key: TTableHintTemplate[TColumnNames] = None,
         schema_contract: TTableHintTemplate[TSchemaContract] = None,
         table_format: TTableHintTemplate[TTableFormat] = None,
+        table_index_type: TTableHintTemplate[TTableIndexType] = None,
     ) -> TResourceHints:
         validator, schema_contract = create_item_validator(columns, schema_contract)
         clean_columns = columns
@@ -289,6 +291,7 @@ def new_table_template(
             columns=clean_columns,  # type: ignore
             schema_contract=schema_contract,  # type: ignore
             table_format=table_format,  # type: ignore
+            table_index_type=table_index_type,  # type: ignore
         )
         if not table_name:
             new_template.pop("name")
diff --git a/tests/load/pipeline/test_table_indexing.py b/tests/load/pipeline/test_table_indexing.py
new file mode 100644
index 0000000000..5f62cddfee
--- /dev/null
+++ b/tests/load/pipeline/test_table_indexing.py
@@ -0,0 +1,140 @@
+import os
+import pytest
+from typing import Iterator, List, Any, Union
+from textwrap import dedent
+
+import dlt
+from dlt.common.schema import TColumnSchema
+from dlt.common.schema.typing import TTableIndexType, TSchemaTables
+from dlt.common.schema.utils import get_table_index_type
+
+from dlt.destinations.sql_client import SqlClientBase
+
+from tests.load.utils import TABLE_UPDATE, TABLE_ROW_ALL_DATA_TYPES
+from tests.load.pipeline.utils import (
+    destinations_configs,
+    DestinationTestConfiguration,
+)
+
+
+TABLE_INDEX_TYPE_COLUMN_SCHEMA_PARAM_GRID = [
+    ("heap", None),
+    # For "clustered_columnstore_index" tables, different code paths exist
+    # when no column schema is specified versus when a column schema is
+    # specified, so we test both.
+    ("clustered_columnstore_index", None),
+    ("clustered_columnstore_index", TABLE_UPDATE),
+]
+
+
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(default_sql_configs=True, subset=["synapse"]),
+    ids=lambda x: x.name,
+)
+@pytest.mark.parametrize(
+    "table_index_type,column_schema", TABLE_INDEX_TYPE_COLUMN_SCHEMA_PARAM_GRID
+)
+def test_default_table_index_type_configuration(
+    destination_config: DestinationTestConfiguration,
+    table_index_type: TTableIndexType,
+    column_schema: Union[List[TColumnSchema], None],
+) -> None:
+    # Configure default_table_index_type.
+    os.environ["DESTINATION__SYNAPSE__DEFAULT_TABLE_INDEX_TYPE"] = table_index_type
+
+    @dlt.resource(
+        name="items_without_table_index_type_specified",
+        write_disposition="append",
+        columns=column_schema,
+    )
+    def items_without_table_index_type_specified() -> Iterator[Any]:
+        yield TABLE_ROW_ALL_DATA_TYPES
+
+    pipeline = destination_config.setup_pipeline(
+        f"test_default_table_index_type_{table_index_type}",
+        full_refresh=True,
+    )
+    job_client = pipeline.destination_client()
+    # Assert configuration value gets properly propagated to job client configuration.
+    assert job_client.config.default_table_index_type == table_index_type  # type: ignore[attr-defined]
+
+    # Run the pipeline and create the tables.
+    pipeline.run(items_without_table_index_type_specified)
+
+    # For all tables, assert the applied index type equals the expected index type.
+    # Child tables, if any, inherit the index type of their parent.
+    tables = pipeline.default_schema.tables
+    for table_name in tables:
+        applied_table_index_type = job_client.get_storage_table_index_type(table_name)  # type: ignore[attr-defined]
+        if table_name in pipeline.default_schema.data_table_names():
+            # For data tables, the applied table index type should be the default value.
+            assert applied_table_index_type == job_client.config.default_table_index_type  # type: ignore[attr-defined]
+        elif table_name in pipeline.default_schema.dlt_table_names():
+            # For dlt tables, the applied table index type should always be "heap".
+            assert applied_table_index_type == "heap"
+
+    # Test overriding the default_table_index_type from a resource configuration.
+    if job_client.config.default_table_index_type == "heap":  # type: ignore[attr-defined]
+
+        @dlt.resource(
+            name="items_with_table_index_type_specified",
+            write_disposition="append",
+            table_index_type="clustered_columnstore_index",
+            columns=column_schema,
+        )
+        def items_with_table_index_type_specified() -> Iterator[Any]:
+            yield TABLE_ROW_ALL_DATA_TYPES
+
+        pipeline.run(items_with_table_index_type_specified)
+        applied_table_index_type = job_client.get_storage_table_index_type(  # type: ignore[attr-defined]
+            "items_with_table_index_type_specified"
+        )
+        # While the default is "heap", the applied index type should be "clustered_columnstore_index"
+        # because it was provided as argument to the resource.
+        assert applied_table_index_type == "clustered_columnstore_index"
+
+
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(default_sql_configs=True, subset=["synapse"]),
+    ids=lambda x: x.name,
+)
+@pytest.mark.parametrize(
+    "table_index_type,column_schema", TABLE_INDEX_TYPE_COLUMN_SCHEMA_PARAM_GRID
+)
+def test_resource_table_index_type_configuration(
+    destination_config: DestinationTestConfiguration,
+    table_index_type: TTableIndexType,
+    column_schema: Union[List[TColumnSchema], None],
+) -> None:
+    @dlt.resource(
+        name="items_with_table_index_type_specified",
+        write_disposition="append",
+        table_index_type=table_index_type,
+        columns=column_schema,
+    )
+    def items_with_table_index_type_specified() -> Iterator[Any]:
+        yield TABLE_ROW_ALL_DATA_TYPES
+
+    pipeline = destination_config.setup_pipeline(
+        f"test_table_index_type_{table_index_type}",
+        full_refresh=True,
+    )
+
+    # Run the pipeline and create the tables.
+    pipeline.run(items_with_table_index_type_specified)
+
+    # For all tables, assert the applied index type equals the expected index type.
+    # Child tables, if any, inherit the index type of their parent.
+    job_client = pipeline.destination_client()
+    tables = pipeline.default_schema.tables
+    for table_name in tables:
+        applied_table_index_type = job_client.get_storage_table_index_type(table_name)  # type: ignore[attr-defined]
+        if table_name in pipeline.default_schema.data_table_names():
+            # For data tables, the applied table index type should be the type
+            # configured in the resource.
+            assert applied_table_index_type == table_index_type
+        elif table_name in pipeline.default_schema.dlt_table_names():
+            # For dlt tables, the applied table index type should always be "heap".
+            assert applied_table_index_type == "heap"
diff --git a/tests/load/synapse/test_synapse_table_builder.py b/tests/load/synapse/test_synapse_table_builder.py
index f58a7d5883..4719a8d003 100644
--- a/tests/load/synapse/test_synapse_table_builder.py
+++ b/tests/load/synapse/test_synapse_table_builder.py
@@ -14,7 +14,10 @@
 )
 
 from tests.load.utils import TABLE_UPDATE
-from dlt.destinations.impl.synapse.synapse import HINT_TO_SYNAPSE_ATTR
+from dlt.destinations.impl.synapse.synapse import (
+    HINT_TO_SYNAPSE_ATTR,
+    TABLE_INDEX_TYPE_TO_SYNAPSE_ATTR,
+)
 
 
 @pytest.fixture
@@ -70,7 +73,9 @@ def test_create_table(client: SynapseClient) -> None:
     assert '"col6_precision" decimal(6,2)  NOT NULL' in sql
     assert '"col7_precision" varbinary(19)' in sql
     assert '"col11_precision" time(3)  NOT NULL' in sql
-    assert "WITH ( HEAP )" in sql
+    table_index_type = client.config.default_table_index_type
+    table_index_type_attr = TABLE_INDEX_TYPE_TO_SYNAPSE_ATTR[table_index_type]
+    assert f"WITH ( {table_index_type_attr} )" in sql
 
 
 def test_alter_table(client: SynapseClient) -> None:
@@ -97,7 +102,9 @@ def test_alter_table(client: SynapseClient) -> None:
     assert '"col6_precision" decimal(6,2)  NOT NULL' in sql
     assert '"col7_precision" varbinary(19)' in sql
     assert '"col11_precision" time(3)  NOT NULL' in sql
-    assert "WITH ( HEAP )" not in sql
+    table_index_type = client.config.default_table_index_type
+    table_index_type_attr = TABLE_INDEX_TYPE_TO_SYNAPSE_ATTR[table_index_type]
+    assert f"WITH ( {table_index_type_attr} )" not in sql
 
 
 @pytest.mark.parametrize("hint", ["primary_key", "unique"])

From db73162fef46c98c73ea00daba686d53211c6f81 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Tue, 23 Jan 2024 14:59:10 +0100
Subject: [PATCH 05/23] add load concurrency handling and warning

---
 .../impl/synapse/configuration.py             | 61 ++++++++++++++++++-
 dlt/destinations/impl/synapse/factory.py      | 10 +--
 dlt/pipeline/pipeline.py                      |  9 ++-
 .../load/pipeline/test_replace_disposition.py | 10 ++-
 4 files changed, 76 insertions(+), 14 deletions(-)

diff --git a/dlt/destinations/impl/synapse/configuration.py b/dlt/destinations/impl/synapse/configuration.py
index 966997b5a2..b5eec82e9e 100644
--- a/dlt/destinations/impl/synapse/configuration.py
+++ b/dlt/destinations/impl/synapse/configuration.py
@@ -1,7 +1,8 @@
 from typing import Final, Any, List, Dict, Optional, ClassVar
 
 from dlt.common.configuration import configspec
-from dlt.common.schema.typing import TTableIndexType
+from dlt.common.schema.typing import TTableIndexType, TWriteDisposition
+from dlt.common import logger
 
 from dlt.destinations.impl.mssql.configuration import (
     MsSqlCredentials,
@@ -36,10 +37,66 @@ class SynapseClientConfiguration(MsSqlClientConfiguration):
     # columnstore tables do not support varchar(max), nvarchar(max), and varbinary(max).
     # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index
     default_table_index_type: Optional[TTableIndexType] = "heap"
+    """
+    Table index type that is used if no table index type is specified on the resource.
+    This only affects data tables, dlt system tables ignore this setting and
+    are always created as "heap" tables.
+    """
 
-    # Determines if `primary_key` and `unique` column hints are applied.
     # Set to False by default because the PRIMARY KEY and UNIQUE constraints
     # are tricky in Synapse: they are NOT ENFORCED and can lead to innacurate
     # results if the user does not ensure all column values are unique.
     # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-table-constraints
     create_indexes: Optional[bool] = False
+    """Whether `primary_key` and `unique` column hints are applied."""
+
+    # Concurrency is disabled by overriding the configured number of workers to 1 at runtime.
+    auto_disable_concurrency: Optional[bool] = True
+    """Whether concurrency is automatically disabled in cases where it might cause issues."""
+
+    __config_gen_annotations__: ClassVar[List[str]] = [
+        "default_table_index_type",
+        "create_indexes",
+        "auto_disable_concurrency",
+    ]
+
+    def get_load_workers(self, write_disposition: TWriteDisposition, workers: int) -> int:
+        if (
+            write_disposition == "replace"
+            and self.replace_strategy == "staging-optimized"
+            and workers > 1
+        ):
+            print("auto_disable_concurrency:", self.auto_disable_concurrency)
+            warning_msg_shared = (
+                'Data is being loaded into Synapse with write disposition "replace"'
+                ' and replace strategy "staging-optimized", while the number of'
+                f" load workers ({workers}) > 1. This configuration is problematic"
+                " in some cases, because Synapse does not always handle concurrency well"
+                " with the CTAS queries that are used behind the scenes to implement"
+                ' the "staging-optimized" strategy.'
+            )
+            if self.auto_disable_concurrency:
+                logger.warning(
+                    warning_msg_shared
+                    + " The number of load workers will be automatically adjusted"
+                    " and set to 1 to eliminate concurrency and prevent potential"
+                    " issues. If you don't want this to happen, set the"
+                    " DESTINATION__SYNAPSE__AUTO_DISABLE_CONCURRENCY environment"
+                    ' variable to "false", or add the following to your config TOML:'
+                    "\n\n[destination.synapse]\nauto_disable_concurrency = false\n"
+                )
+                workers = 1  # adjust workers
+            else:
+                logger.warning(
+                    warning_msg_shared
+                    + " If you experience your pipeline gets stuck and doesn't finish,"
+                    " try reducing the number of load workers by exporting the LOAD__WORKERS"
+                    " environment variable or by setting it in your config TOML:"
+                    "\n\n[load]\nworkers = 1 #  a value of 1 disables all concurrency,"
+                    " but perhaps a higher value also works\n\n"
+                    "Alternatively, you can set the DESTINATION__SYNAPSE__AUTO_DISABLE_CONCURRENCY"
+                    ' environment variable to "true", or add the following to your config TOML'
+                    " to automatically disable concurrency where needed:"
+                    "\n\n[destination.synapse]\nauto_disable_concurrency = true\n"
+                )
+        return workers
diff --git a/dlt/destinations/impl/synapse/factory.py b/dlt/destinations/impl/synapse/factory.py
index 6bdf2946b6..f77d8c11c2 100644
--- a/dlt/destinations/impl/synapse/factory.py
+++ b/dlt/destinations/impl/synapse/factory.py
@@ -30,6 +30,7 @@ def __init__(
         credentials: t.Union[SynapseCredentials, t.Dict[str, t.Any], str] = None,
         default_table_index_type: t.Optional[TTableIndexType] = "heap",
         create_indexes: bool = False,
+        auto_disable_concurrency: t.Optional[bool] = True,
         destination_name: t.Optional[str] = None,
         environment: t.Optional[str] = None,
         **kwargs: t.Any,
@@ -41,17 +42,16 @@ def __init__(
         Args:
             credentials: Credentials to connect to the Synapse dedicated pool. Can be an instance of `SynapseCredentials` or
                 a connection string in the format `synapse://user:password@host:port/database`
-            default_table_index_type: Table index type that is used if no
-                table index type is specified on the resource. This setting only
-                applies to data tables, dlt system tables are not affected
-                (they always have "heap" as table index type).
-            create_indexes: Should unique indexes be created, defaults to False
+            default_table_index_type: Maps directly to the default_table_index_type attribute of the SynapseClientConfiguration object.
+            create_indexes: Maps directly to the create_indexes attribute of the SynapseClientConfiguration object.
+            auto_disable_concurrency: Maps directly to the auto_disable_concurrency attribute of the SynapseClientConfiguration object.
             **kwargs: Additional arguments passed to the destination config
         """
         super().__init__(
             credentials=credentials,
             default_table_index_type=default_table_index_type,
             create_indexes=create_indexes,
+            auto_disable_concurrency=auto_disable_concurrency,
             destination_name=destination_name,
             environment=environment,
             **kwargs,
diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py
index 73c8f076d1..44a2cbdfdb 100644
--- a/dlt/pipeline/pipeline.py
+++ b/dlt/pipeline/pipeline.py
@@ -45,7 +45,7 @@
     TAnySchemaColumns,
     TSchemaContract,
 )
-from dlt.common.schema.utils import normalize_schema_name
+from dlt.common.schema.utils import normalize_schema_name, get_write_disposition
 from dlt.common.storages.exceptions import LoadPackageNotFound
 from dlt.common.typing import DictStrStr, TFun, TSecretValue, is_optional_type
 from dlt.common.runners import pool_runner as runner
@@ -483,6 +483,13 @@ def load(
         # make sure that destination is set and client is importable and can be instantiated
         client, staging_client = self._get_destination_clients(self.default_schema)
 
+        # for synapse we might need to adjust the number of load workers
+        if self.destination.destination_name == "synapse":
+            write_disposition = get_write_disposition(
+                self.default_schema.tables, self.default_schema.data_table_names()[0]
+            )
+            workers = client.config.get_load_workers(write_disposition, workers)  # type: ignore[attr-defined]
+
         # create default loader config and the loader
         load_config = LoaderConfiguration(
             workers=workers,
diff --git a/tests/load/pipeline/test_replace_disposition.py b/tests/load/pipeline/test_replace_disposition.py
index 1dde56a6b1..65d3646f2d 100644
--- a/tests/load/pipeline/test_replace_disposition.py
+++ b/tests/load/pipeline/test_replace_disposition.py
@@ -264,16 +264,14 @@ def test_replace_table_clearing(
     # use staging tables for replace
     os.environ["DESTINATION__REPLACE_STRATEGY"] = replace_strategy
 
-    if destination_config.destination == "synapse" and replace_strategy == "staging-optimized":
-        # The "staging-optimized" replace strategy makes Synapse suspend the CTAS
-        # queries used to recreate the staging table, and hang, when the number
-        # of load workers is greater than 1.
-        os.environ["LOAD__WORKERS"] = "1"
-
     pipeline = destination_config.setup_pipeline(
         "test_replace_table_clearing", dataset_name="test_replace_table_clearing", full_refresh=True
     )
 
+    if destination_config.destination == "synapse" and replace_strategy == "staging-optimized":
+        # this case requires load concurrency to be disabled (else the test gets stuck)
+        assert pipeline.destination_client().config.auto_disable_concurrency is True  # type: ignore[attr-defined]
+
     @dlt.resource(name="main_resource", write_disposition="replace", primary_key="id")
     def items_with_subitems():
         data = {

From 75be2ce54ccb486679ca1b177551c3097a2f3908 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Tue, 23 Jan 2024 20:26:06 +0100
Subject: [PATCH 06/23] rewrite naive code to prevent IndexError

---
 dlt/destinations/impl/synapse/configuration.py | 14 +++++++++-----
 dlt/pipeline/pipeline.py                       |  7 ++-----
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/dlt/destinations/impl/synapse/configuration.py b/dlt/destinations/impl/synapse/configuration.py
index b5eec82e9e..119c55ad7a 100644
--- a/dlt/destinations/impl/synapse/configuration.py
+++ b/dlt/destinations/impl/synapse/configuration.py
@@ -1,8 +1,9 @@
 from typing import Final, Any, List, Dict, Optional, ClassVar
 
-from dlt.common.configuration import configspec
-from dlt.common.schema.typing import TTableIndexType, TWriteDisposition
 from dlt.common import logger
+from dlt.common.configuration import configspec
+from dlt.common.schema.typing import TTableIndexType, TSchemaTables
+from dlt.common.schema.utils import get_write_disposition
 
 from dlt.destinations.impl.mssql.configuration import (
     MsSqlCredentials,
@@ -60,13 +61,16 @@ class SynapseClientConfiguration(MsSqlClientConfiguration):
         "auto_disable_concurrency",
     ]
 
-    def get_load_workers(self, write_disposition: TWriteDisposition, workers: int) -> int:
+    def get_load_workers(self, tables: TSchemaTables, workers: int) -> int:
+        """Returns the adjusted number of load workers to prevent concurrency issues."""
+
+        write_dispositions = [get_write_disposition(tables, table_name) for table_name in tables]
+        n_replace_dispositions = len([d for d in write_dispositions if d == "replace"])
         if (
-            write_disposition == "replace"
+            n_replace_dispositions > 1
             and self.replace_strategy == "staging-optimized"
             and workers > 1
         ):
-            print("auto_disable_concurrency:", self.auto_disable_concurrency)
             warning_msg_shared = (
                 'Data is being loaded into Synapse with write disposition "replace"'
                 ' and replace strategy "staging-optimized", while the number of'
diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py
index 44a2cbdfdb..3a0a8f3931 100644
--- a/dlt/pipeline/pipeline.py
+++ b/dlt/pipeline/pipeline.py
@@ -45,7 +45,7 @@
     TAnySchemaColumns,
     TSchemaContract,
 )
-from dlt.common.schema.utils import normalize_schema_name, get_write_disposition
+from dlt.common.schema.utils import normalize_schema_name
 from dlt.common.storages.exceptions import LoadPackageNotFound
 from dlt.common.typing import DictStrStr, TFun, TSecretValue, is_optional_type
 from dlt.common.runners import pool_runner as runner
@@ -485,10 +485,7 @@ def load(
 
         # for synapse we might need to adjust the number of load workers
         if self.destination.destination_name == "synapse":
-            write_disposition = get_write_disposition(
-                self.default_schema.tables, self.default_schema.data_table_names()[0]
-            )
-            workers = client.config.get_load_workers(write_disposition, workers)  # type: ignore[attr-defined]
+            workers = client.config.get_load_workers(self.default_schema.tables, workers)  # type: ignore[attr-defined]
 
         # create default loader config and the loader
         load_config = LoaderConfiguration(

From 014543aa5adb7669adead1cbda39cb21268c9070 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Thu, 25 Jan 2024 19:56:21 +0100
Subject: [PATCH 07/23] add support for staged Parquet loading

---
 dlt/destinations/impl/synapse/__init__.py     |   4 +-
 .../impl/synapse/configuration.py             |   8 +-
 dlt/destinations/impl/synapse/factory.py      |   5 +-
 dlt/destinations/impl/synapse/synapse.py      | 115 +++++++++++++++++-
 poetry.lock                                   |   4 +-
 pyproject.toml                                |   2 +-
 tests/load/pipeline/test_pipelines.py         |  17 +--
 tests/load/pipeline/test_stage_loading.py     |  35 +++++-
 tests/load/utils.py                           |  17 +++
 9 files changed, 182 insertions(+), 25 deletions(-)

diff --git a/dlt/destinations/impl/synapse/__init__.py b/dlt/destinations/impl/synapse/__init__.py
index 175b011186..639d8a598f 100644
--- a/dlt/destinations/impl/synapse/__init__.py
+++ b/dlt/destinations/impl/synapse/__init__.py
@@ -9,8 +9,8 @@ def capabilities() -> DestinationCapabilitiesContext:
 
     caps.preferred_loader_file_format = "insert_values"
     caps.supported_loader_file_formats = ["insert_values"]
-    caps.preferred_staging_file_format = None
-    caps.supported_staging_file_formats = []
+    caps.preferred_staging_file_format = "parquet"
+    caps.supported_staging_file_formats = ["parquet"]
 
     caps.insert_values_writer_type = "select_union"  # https://stackoverflow.com/a/77014299
 
diff --git a/dlt/destinations/impl/synapse/configuration.py b/dlt/destinations/impl/synapse/configuration.py
index 119c55ad7a..34b227a2ac 100644
--- a/dlt/destinations/impl/synapse/configuration.py
+++ b/dlt/destinations/impl/synapse/configuration.py
@@ -48,17 +48,21 @@ class SynapseClientConfiguration(MsSqlClientConfiguration):
     # are tricky in Synapse: they are NOT ENFORCED and can lead to innacurate
     # results if the user does not ensure all column values are unique.
     # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-table-constraints
-    create_indexes: Optional[bool] = False
+    create_indexes: bool = False
     """Whether `primary_key` and `unique` column hints are applied."""
 
     # Concurrency is disabled by overriding the configured number of workers to 1 at runtime.
-    auto_disable_concurrency: Optional[bool] = True
+    auto_disable_concurrency: bool = True
     """Whether concurrency is automatically disabled in cases where it might cause issues."""
 
+    staging_use_msi: bool = False
+    """Whether the managed identity of the Synapse workspace is used to authorize access to the staging Storage Account."""
+
     __config_gen_annotations__: ClassVar[List[str]] = [
         "default_table_index_type",
         "create_indexes",
         "auto_disable_concurrency",
+        "staging_use_msi",
     ]
 
     def get_load_workers(self, tables: TSchemaTables, workers: int) -> int:
diff --git a/dlt/destinations/impl/synapse/factory.py b/dlt/destinations/impl/synapse/factory.py
index f77d8c11c2..3d951f3d4a 100644
--- a/dlt/destinations/impl/synapse/factory.py
+++ b/dlt/destinations/impl/synapse/factory.py
@@ -30,7 +30,8 @@ def __init__(
         credentials: t.Union[SynapseCredentials, t.Dict[str, t.Any], str] = None,
         default_table_index_type: t.Optional[TTableIndexType] = "heap",
         create_indexes: bool = False,
-        auto_disable_concurrency: t.Optional[bool] = True,
+        auto_disable_concurrency: bool = True,
+        staging_use_msi: bool = False,
         destination_name: t.Optional[str] = None,
         environment: t.Optional[str] = None,
         **kwargs: t.Any,
@@ -45,6 +46,7 @@ def __init__(
             default_table_index_type: Maps directly to the default_table_index_type attribute of the SynapseClientConfiguration object.
             create_indexes: Maps directly to the create_indexes attribute of the SynapseClientConfiguration object.
             auto_disable_concurrency: Maps directly to the auto_disable_concurrency attribute of the SynapseClientConfiguration object.
+            auto_disable_concurrency: Maps directly to the staging_use_msi attribute of the SynapseClientConfiguration object.
             **kwargs: Additional arguments passed to the destination config
         """
         super().__init__(
@@ -52,6 +54,7 @@ def __init__(
             default_table_index_type=default_table_index_type,
             create_indexes=create_indexes,
             auto_disable_concurrency=auto_disable_concurrency,
+            staging_use_msi=staging_use_msi,
             destination_name=destination_name,
             environment=environment,
             **kwargs,
diff --git a/dlt/destinations/impl/synapse/synapse.py b/dlt/destinations/impl/synapse/synapse.py
index e01e851d83..c29c0df3f5 100644
--- a/dlt/destinations/impl/synapse/synapse.py
+++ b/dlt/destinations/impl/synapse/synapse.py
@@ -1,17 +1,28 @@
+import os
 from typing import ClassVar, Sequence, List, Dict, Any, Optional, cast
 from copy import deepcopy
 from textwrap import dedent
+from urllib.parse import urlparse, urlunparse
 
 from dlt.common.destination import DestinationCapabilitiesContext
-from dlt.common.destination.reference import SupportsStagingDestination, NewLoadJob
+from dlt.common.destination.reference import (
+    SupportsStagingDestination,
+    NewLoadJob,
+    CredentialsConfiguration,
+)
 
 from dlt.common.schema import TTableSchema, TColumnSchema, Schema, TColumnHint
+from dlt.common.schema.utils import table_schema_has_type
 from dlt.common.schema.typing import TTableSchemaColumns, TTableIndexType
 
+from dlt.common.configuration.specs import AzureCredentialsWithoutDefaults
+
+from dlt.destinations.job_impl import NewReferenceJob
 from dlt.destinations.sql_jobs import SqlStagingCopyJob, SqlJobParams
 from dlt.destinations.sql_client import SqlClientBase
 from dlt.destinations.insert_job_client import InsertValuesJobClient
-from dlt.destinations.job_client_impl import SqlJobClientBase
+from dlt.destinations.job_client_impl import SqlJobClientBase, LoadJob, CopyRemoteFileLoadJob
+from dlt.destinations.exceptions import LoadJobTerminalException
 
 from dlt.destinations.impl.mssql.mssql import (
     MsSqlTypeMapper,
@@ -35,7 +46,7 @@
 }
 
 
-class SynapseClient(MsSqlClient):
+class SynapseClient(MsSqlClient, SupportsStagingDestination):
     capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities()
 
     def __init__(self, schema: Schema, config: SynapseClientConfiguration) -> None:
@@ -140,6 +151,21 @@ def get_storage_table_index_type(self, table_name: str) -> TTableIndexType:
             table_index_type = sql_client.execute_sql(sql)[0][0]
             return cast(TTableIndexType, table_index_type)
 
+    def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> LoadJob:
+        job = super().start_file_load(table, file_path, load_id)
+        if not job:
+            assert NewReferenceJob.is_reference_job(
+                file_path
+            ), "Synapse must use staging to load files"
+            job = SynapseCopyFileLoadJob(
+                table,
+                file_path,
+                self.sql_client,
+                cast(AzureCredentialsWithoutDefaults, self.config.staging_config.credentials),
+                self.config.staging_use_msi,
+            )
+        return job
+
 
 class SynapseStagingCopyJob(SqlStagingCopyJob):
     @classmethod
@@ -173,3 +199,86 @@ def generate_sql(
             )
 
         return sql
+
+
+class SynapseCopyFileLoadJob(CopyRemoteFileLoadJob):
+    def __init__(
+        self,
+        table: TTableSchema,
+        file_path: str,
+        sql_client: SqlClientBase[Any],
+        staging_credentials: Optional[AzureCredentialsWithoutDefaults] = None,
+        staging_use_msi: bool = False,
+    ) -> None:
+        self.staging_use_msi = staging_use_msi
+        super().__init__(table, file_path, sql_client, staging_credentials)
+
+    def execute(self, table: TTableSchema, bucket_path: str) -> None:
+        # get format
+        ext = os.path.splitext(bucket_path)[1][1:]
+        if ext == "parquet":
+            if table_schema_has_type(table, "time"):
+                # Synapse interprets Parquet TIME columns as bigint, resulting in
+                # an incompatibility error.
+                raise LoadJobTerminalException(
+                    self.file_name(),
+                    "Synapse cannot load TIME columns from Parquet files. Switch to direct INSERT"
+                    " file format or convert `datetime.time` objects in your data to `str` or"
+                    " `datetime.datetime`",
+                )
+            file_type = "PARQUET"
+
+            # dlt-generated DDL statements will still create the table, but
+            # enabling AUTO_CREATE_TABLE prevents a MalformedInputException.
+            auto_create_table = "ON"
+        else:
+            raise ValueError(f"Unsupported file type {ext} for Synapse.")
+
+        staging_credentials = self._staging_credentials
+        assert staging_credentials is not None
+        assert isinstance(staging_credentials, AzureCredentialsWithoutDefaults)
+        azure_storage_account_name = staging_credentials.azure_storage_account_name
+        https_path = self._get_https_path(bucket_path, azure_storage_account_name)
+        table_name = table["name"]
+
+        if self.staging_use_msi:
+            credential = "IDENTITY = 'Managed Identity'"
+        else:
+            sas_token = staging_credentials.azure_storage_sas_token
+            credential = f"IDENTITY = 'Shared Access Signature', SECRET = '{sas_token}'"
+
+        # Copy data from staging file into Synapse table.
+        with self._sql_client.begin_transaction():
+            dataset_name = self._sql_client.dataset_name
+            sql = dedent(f"""
+                COPY INTO [{dataset_name}].[{table_name}]
+                FROM '{https_path}'
+                WITH (
+                    FILE_TYPE = '{file_type}',
+                    CREDENTIAL = ({credential}),
+                    AUTO_CREATE_TABLE = '{auto_create_table}'
+                )
+            """)
+            self._sql_client.execute_sql(sql)
+
+    def exception(self) -> str:
+        # this part of code should be never reached
+        raise NotImplementedError()
+
+    def _get_https_path(self, bucket_path: str, storage_account_name: str) -> str:
+        """
+        Converts a path in the form of az://<container_name>/<path> to
+        https://<storage_account_name>.blob.core.windows.net/<container_name>/<path>
+        as required by Synapse.
+        """
+        bucket_url = urlparse(bucket_path)
+        # "blob" endpoint has better performance than "dfs" endoint
+        # https://learn.microsoft.com/en-us/sql/t-sql/statements/copy-into-transact-sql?view=azure-sqldw-latest#external-locations
+        endpoint = "blob"
+        _path = "/" + bucket_url.netloc + bucket_url.path
+        https_url = bucket_url._replace(
+            scheme="https",
+            netloc=f"{storage_account_name}.{endpoint}.core.windows.net",
+            path=_path,
+        )
+        return urlunparse(https_url)
diff --git a/poetry.lock b/poetry.lock
index 4d079fc44d..400bcb61e2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -8466,10 +8466,10 @@ qdrant = ["qdrant-client"]
 redshift = ["psycopg2-binary", "psycopg2cffi"]
 s3 = ["botocore", "s3fs"]
 snowflake = ["snowflake-connector-python"]
-synapse = ["pyodbc"]
+synapse = ["adlfs", "pyodbc"]
 weaviate = ["weaviate-client"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.13"
-content-hash = "26c595a857f17a5cbdb348f165c267d8910412325be4e522d0e91224c7fec588"
+content-hash = "75a5f533e9456898ad0157b699d76d9c5a1abf8f4cd04ed7be2235ae3198e16c"
diff --git a/pyproject.toml b/pyproject.toml
index d9d5858674..f6ae77b593 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -96,7 +96,7 @@ cli = ["pipdeptree", "cron-descriptor"]
 athena = ["pyathena", "pyarrow", "s3fs", "botocore"]
 weaviate = ["weaviate-client"]
 mssql = ["pyodbc"]
-synapse = ["pyodbc"]
+synapse = ["pyodbc", "adlfs"]
 qdrant = ["qdrant-client"]
 
 [tool.poetry.scripts]
diff --git a/tests/load/pipeline/test_pipelines.py b/tests/load/pipeline/test_pipelines.py
index d170fd553b..304f1a0d2f 100644
--- a/tests/load/pipeline/test_pipelines.py
+++ b/tests/load/pipeline/test_pipelines.py
@@ -788,7 +788,7 @@ def other_data():
         column_schemas["col11_precision"]["precision"] = 0
 
     # drop TIME from databases not supporting it via parquet
-    if destination_config.destination in ["redshift", "athena"]:
+    if destination_config.destination in ["redshift", "athena", "synapse"]:
         data_types.pop("col11")
         data_types.pop("col11_null")
         data_types.pop("col11_precision")
@@ -827,15 +827,16 @@ def some_source():
     assert len(package_info.jobs["completed_jobs"]) == expected_completed_jobs
 
     with pipeline.sql_client() as sql_client:
+        qual_name = sql_client.make_qualified_table_name
         assert [
-            row[0] for row in sql_client.execute_sql("SELECT * FROM other_data ORDER BY 1")
+            row[0]
+            for row in sql_client.execute_sql(f"SELECT * FROM {qual_name('other_data')} ORDER BY 1")
         ] == [1, 2, 3, 4, 5]
-        assert [row[0] for row in sql_client.execute_sql("SELECT * FROM some_data ORDER BY 1")] == [
-            1,
-            2,
-            3,
-        ]
-        db_rows = sql_client.execute_sql("SELECT * FROM data_types")
+        assert [
+            row[0]
+            for row in sql_client.execute_sql(f"SELECT * FROM {qual_name('some_data')} ORDER BY 1")
+        ] == [1, 2, 3]
+        db_rows = sql_client.execute_sql(f"SELECT * FROM {qual_name('data_types')}")
         assert len(db_rows) == 10
         db_row = list(db_rows[0])
         # "snowflake" and "bigquery" do not parse JSON form parquet string so double parse
diff --git a/tests/load/pipeline/test_stage_loading.py b/tests/load/pipeline/test_stage_loading.py
index de4a7f4c3b..ca27cf4b05 100644
--- a/tests/load/pipeline/test_stage_loading.py
+++ b/tests/load/pipeline/test_stage_loading.py
@@ -94,7 +94,13 @@ def test_staging_load(destination_config: DestinationTestConfiguration) -> None:
 
     # check item of first row in db
     with pipeline.sql_client() as sql_client:
-        rows = sql_client.execute_sql("SELECT url FROM issues WHERE id = 388089021 LIMIT 1")
+        if destination_config.destination in ["mssql", "synapse"]:
+            qual_name = sql_client.make_qualified_table_name
+            rows = sql_client.execute_sql(
+                f"SELECT TOP 1 url FROM {qual_name('issues')} WHERE id = 388089021"
+            )
+        else:
+            rows = sql_client.execute_sql("SELECT url FROM issues WHERE id = 388089021 LIMIT 1")
         assert rows[0][0] == "https://api.github.com/repos/duckdb/duckdb/issues/71"
 
     if destination_config.supports_merge:
@@ -109,10 +115,23 @@ def test_staging_load(destination_config: DestinationTestConfiguration) -> None:
 
         # check changes where merged in
         with pipeline.sql_client() as sql_client:
-            rows = sql_client.execute_sql("SELECT number FROM issues WHERE id = 1232152492 LIMIT 1")
-            assert rows[0][0] == 105
-            rows = sql_client.execute_sql("SELECT number FROM issues WHERE id = 1142699354 LIMIT 1")
-            assert rows[0][0] == 300
+            if destination_config.destination in ["mssql", "synapse"]:
+                qual_name = sql_client.make_qualified_table_name
+                rows_1 = sql_client.execute_sql(
+                    f"SELECT TOP 1 number FROM {qual_name('issues')} WHERE id = 1232152492"
+                )
+                rows_2 = sql_client.execute_sql(
+                    f"SELECT TOP 1 number FROM {qual_name('issues')} WHERE id = 1142699354"
+                )
+            else:
+                rows_1 = sql_client.execute_sql(
+                    "SELECT number FROM issues WHERE id = 1232152492 LIMIT 1"
+                )
+                rows_2 = sql_client.execute_sql(
+                    "SELECT number FROM issues WHERE id = 1142699354 LIMIT 1"
+                )
+            assert rows_1[0][0] == 105
+            assert rows_2[0][0] == 300
 
     # test append
     info = pipeline.run(
@@ -161,6 +180,9 @@ def test_all_data_types(destination_config: DestinationTestConfiguration) -> Non
     ) and destination_config.file_format in ("parquet", "jsonl"):
         # Redshift copy doesn't support TIME column
         exclude_types.append("time")
+    if destination_config.destination == "synapse" and destination_config.file_format == "parquet":
+        # TIME columns are not supported for staged parquet loads into Synapse
+        exclude_types.append("time")
     if destination_config.destination == "redshift" and destination_config.file_format in (
         "parquet",
         "jsonl",
@@ -199,7 +221,8 @@ def my_source():
     assert_load_info(info)
 
     with pipeline.sql_client() as sql_client:
-        db_rows = sql_client.execute_sql("SELECT * FROM data_types")
+        qual_name = sql_client.make_qualified_table_name
+        db_rows = sql_client.execute_sql(f"SELECT * FROM {qual_name('data_types')}")
         assert len(db_rows) == 10
         db_row = list(db_rows[0])
         # parquet is not really good at inserting json, best we get are strings in JSON columns
diff --git a/tests/load/utils.py b/tests/load/utils.py
index 55445e0b95..207e32209f 100644
--- a/tests/load/utils.py
+++ b/tests/load/utils.py
@@ -95,6 +95,7 @@ class DestinationTestConfiguration:
     bucket_url: Optional[str] = None
     stage_name: Optional[str] = None
     staging_iam_role: Optional[str] = None
+    staging_use_msi: bool = False
     extra_info: Optional[str] = None
     supports_merge: bool = True  # TODO: take it from client base class
     force_iceberg: bool = False
@@ -118,6 +119,7 @@ def setup(self) -> None:
         os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = self.bucket_url or ""
         os.environ["DESTINATION__STAGE_NAME"] = self.stage_name or ""
         os.environ["DESTINATION__STAGING_IAM_ROLE"] = self.staging_iam_role or ""
+        os.environ["DESTINATION__STAGING_USE_MSI"] = str(self.staging_use_msi) or ""
         os.environ["DESTINATION__FORCE_ICEBERG"] = str(self.force_iceberg) or ""
 
         """For the filesystem destinations we disable compression to make analyzing the result easier"""
@@ -254,6 +256,21 @@ def destinations_configs(
                 bucket_url=AZ_BUCKET,
                 extra_info="az-authorization",
             ),
+            DestinationTestConfiguration(
+                destination="synapse",
+                staging="filesystem",
+                file_format="parquet",
+                bucket_url=AZ_BUCKET,
+                extra_info="az-authorization",
+            ),
+            DestinationTestConfiguration(
+                destination="synapse",
+                staging="filesystem",
+                file_format="parquet",
+                bucket_url=AZ_BUCKET,
+                staging_use_msi=True,
+                extra_info="az-managed-identity",
+            ),
         ]
 
     if all_staging_configs:

From 7868ca6bfd54ff691e8e84384a65c7b9c55a00f4 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Fri, 26 Jan 2024 21:36:42 +0100
Subject: [PATCH 08/23] made table index type logic Synapse specific through
 destination adapter

---
 dlt/common/destination/reference.py           |  4 +-
 dlt/common/schema/typing.py                   |  3 --
 dlt/common/schema/utils.py                    | 12 -----
 dlt/destinations/adapters.py                  |  3 +-
 .../impl/qdrant/qdrant_adapter.py             | 11 +---
 dlt/destinations/impl/synapse/__init__.py     |  2 +
 .../impl/synapse/configuration.py             |  4 +-
 dlt/destinations/impl/synapse/factory.py      |  4 +-
 dlt/destinations/impl/synapse/synapse.py      | 23 ++++++---
 .../impl/synapse/synapse_adapter.py           | 50 +++++++++++++++++++
 .../impl/weaviate/weaviate_adapter.py         | 11 +---
 dlt/destinations/utils.py                     | 16 ++++++
 dlt/extract/decorators.py                     |  7 ---
 dlt/extract/hints.py                          |  3 --
 .../test_table_indexing.py                    | 46 ++++++++---------
 15 files changed, 117 insertions(+), 82 deletions(-)
 create mode 100644 dlt/destinations/impl/synapse/synapse_adapter.py
 create mode 100644 dlt/destinations/utils.py
 rename tests/load/{pipeline => synapse}/test_table_indexing.py (81%)

diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py
index 59f13b30b9..1c28dffa8c 100644
--- a/dlt/common/destination/reference.py
+++ b/dlt/common/destination/reference.py
@@ -34,7 +34,7 @@
 from dlt.common.schema import Schema, TTableSchema, TSchemaTables
 from dlt.common.schema.typing import TWriteDisposition
 from dlt.common.schema.exceptions import InvalidDatasetName
-from dlt.common.schema.utils import get_write_disposition, get_table_format, get_table_index_type
+from dlt.common.schema.utils import get_write_disposition, get_table_format
 from dlt.common.configuration import configspec, with_config, resolve_configuration, known_sections
 from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration
 from dlt.common.configuration.accessors import config
@@ -372,8 +372,6 @@ def get_load_table(self, table_name: str, prepare_for_staging: bool = False) ->
                 table["write_disposition"] = get_write_disposition(self.schema.tables, table_name)
             if "table_format" not in table:
                 table["table_format"] = get_table_format(self.schema.tables, table_name)
-            if "table_index_type" not in table:
-                table["table_index_type"] = get_table_index_type(self.schema.tables, table_name)
             return table
         except KeyError:
             raise UnknownTableException(table_name)
diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py
index 351d666553..9a27cbe4bb 100644
--- a/dlt/common/schema/typing.py
+++ b/dlt/common/schema/typing.py
@@ -62,8 +62,6 @@
 """Known hints of a column used to declare hint regexes."""
 TWriteDisposition = Literal["skip", "append", "replace", "merge"]
 TTableFormat = Literal["iceberg"]
-TTableIndexType = Literal["heap", "clustered_columnstore_index"]
-"Table index type. Currently only used for Synapse destination."
 TTypeDetections = Literal[
     "timestamp", "iso_timestamp", "iso_date", "large_integer", "hexbytes_to_text", "wei_to_double"
 ]
@@ -167,7 +165,6 @@ class TTableSchema(TypedDict, total=False):
     columns: TTableSchemaColumns
     resource: Optional[str]
     table_format: Optional[TTableFormat]
-    table_index_type: Optional[TTableIndexType]
 
 
 class TPartialTableSchema(TTableSchema):
diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py
index 5ea244148e..dc243f50dd 100644
--- a/dlt/common/schema/utils.py
+++ b/dlt/common/schema/utils.py
@@ -32,7 +32,6 @@
     TColumnSchema,
     TColumnProp,
     TTableFormat,
-    TTableIndexType,
     TColumnHint,
     TTypeDetectionFunc,
     TTypeDetections,
@@ -619,14 +618,6 @@ def get_table_format(tables: TSchemaTables, table_name: str) -> TTableFormat:
     )
 
 
-def get_table_index_type(tables: TSchemaTables, table_name: str) -> TTableIndexType:
-    """Returns table index type of a table if present. If not, looks up into parent table."""
-    return cast(
-        TTableIndexType,
-        get_inherited_table_hint(tables, table_name, "table_index_type", allow_none=True),
-    )
-
-
 def table_schema_has_type(table: TTableSchema, _typ: TDataType) -> bool:
     """Checks if `table` schema contains column with type _typ"""
     return any(c.get("data_type") == _typ for c in table["columns"].values())
@@ -733,7 +724,6 @@ def new_table(
     resource: str = None,
     schema_contract: TSchemaContract = None,
     table_format: TTableFormat = None,
-    table_index_type: TTableIndexType = None,
 ) -> TTableSchema:
     table: TTableSchema = {
         "name": table_name,
@@ -752,8 +742,6 @@ def new_table(
             table["schema_contract"] = schema_contract
         if table_format:
             table["table_format"] = table_format
-    if table_index_type is not None:
-        table["table_index_type"] = table_index_type
     if validate_schema:
         validate_dict_ignoring_xkeys(
             spec=TColumnSchema,
diff --git a/dlt/destinations/adapters.py b/dlt/destinations/adapters.py
index b8f12599dc..22c98d4f5a 100644
--- a/dlt/destinations/adapters.py
+++ b/dlt/destinations/adapters.py
@@ -2,5 +2,6 @@
 
 from dlt.destinations.impl.weaviate import weaviate_adapter
 from dlt.destinations.impl.qdrant import qdrant_adapter
+from dlt.destinations.impl.synapse import synapse_adapter
 
-__all__ = ["weaviate_adapter", "qdrant_adapter"]
+__all__ = ["weaviate_adapter", "qdrant_adapter", "synapse_adapter"]
diff --git a/dlt/destinations/impl/qdrant/qdrant_adapter.py b/dlt/destinations/impl/qdrant/qdrant_adapter.py
index 243cbd6c5b..215d87a920 100644
--- a/dlt/destinations/impl/qdrant/qdrant_adapter.py
+++ b/dlt/destinations/impl/qdrant/qdrant_adapter.py
@@ -2,6 +2,7 @@
 
 from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
 from dlt.extract import DltResource, resource as make_resource
+from dlt.destinations.utils import ensure_resource
 
 VECTORIZE_HINT = "x-qdrant-embed"
 
@@ -31,15 +32,7 @@ def qdrant_adapter(
         >>> qdrant_adapter(data, embed="description")
         [DltResource with hints applied]
     """
-    # wrap `data` in a resource if not an instance already
-    resource: DltResource
-    if not isinstance(data, DltResource):
-        resource_name: str = None
-        if not hasattr(data, "__name__"):
-            resource_name = "content"
-        resource = make_resource(data, name=resource_name)
-    else:
-        resource = data
+    resource = ensure_resource(data)
 
     column_hints: TTableSchemaColumns = {}
 
diff --git a/dlt/destinations/impl/synapse/__init__.py b/dlt/destinations/impl/synapse/__init__.py
index 639d8a598f..53dbabc090 100644
--- a/dlt/destinations/impl/synapse/__init__.py
+++ b/dlt/destinations/impl/synapse/__init__.py
@@ -3,6 +3,8 @@
 from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE
 from dlt.common.wei import EVM_DECIMAL_PRECISION
 
+from dlt.destinations.impl.synapse.synapse_adapter import synapse_adapter
+
 
 def capabilities() -> DestinationCapabilitiesContext:
     caps = DestinationCapabilitiesContext()
diff --git a/dlt/destinations/impl/synapse/configuration.py b/dlt/destinations/impl/synapse/configuration.py
index 34b227a2ac..cc0e40114b 100644
--- a/dlt/destinations/impl/synapse/configuration.py
+++ b/dlt/destinations/impl/synapse/configuration.py
@@ -2,7 +2,7 @@
 
 from dlt.common import logger
 from dlt.common.configuration import configspec
-from dlt.common.schema.typing import TTableIndexType, TSchemaTables
+from dlt.common.schema.typing import TSchemaTables
 from dlt.common.schema.utils import get_write_disposition
 
 from dlt.destinations.impl.mssql.configuration import (
@@ -11,6 +11,8 @@
 )
 from dlt.destinations.impl.mssql.configuration import MsSqlCredentials
 
+from dlt.destinations.impl.synapse.synapse_adapter import TTableIndexType
+
 
 @configspec
 class SynapseCredentials(MsSqlCredentials):
diff --git a/dlt/destinations/impl/synapse/factory.py b/dlt/destinations/impl/synapse/factory.py
index 3d951f3d4a..0ac58001ca 100644
--- a/dlt/destinations/impl/synapse/factory.py
+++ b/dlt/destinations/impl/synapse/factory.py
@@ -1,13 +1,13 @@
 import typing as t
 
 from dlt.common.destination import Destination, DestinationCapabilitiesContext
-from dlt.common.schema.typing import TTableIndexType
-from dlt.destinations.impl.synapse import capabilities
 
+from dlt.destinations.impl.synapse import capabilities
 from dlt.destinations.impl.synapse.configuration import (
     SynapseCredentials,
     SynapseClientConfiguration,
 )
+from dlt.destinations.impl.synapse.synapse_adapter import TTableIndexType
 
 if t.TYPE_CHECKING:
     from dlt.destinations.impl.synapse.synapse import SynapseClient
diff --git a/dlt/destinations/impl/synapse/synapse.py b/dlt/destinations/impl/synapse/synapse.py
index c29c0df3f5..d34fef1ab4 100644
--- a/dlt/destinations/impl/synapse/synapse.py
+++ b/dlt/destinations/impl/synapse/synapse.py
@@ -12,8 +12,8 @@
 )
 
 from dlt.common.schema import TTableSchema, TColumnSchema, Schema, TColumnHint
-from dlt.common.schema.utils import table_schema_has_type
-from dlt.common.schema.typing import TTableSchemaColumns, TTableIndexType
+from dlt.common.schema.utils import table_schema_has_type, get_inherited_table_hint
+from dlt.common.schema.typing import TTableSchemaColumns
 
 from dlt.common.configuration.specs import AzureCredentialsWithoutDefaults
 
@@ -34,6 +34,10 @@
 from dlt.destinations.impl.synapse import capabilities
 from dlt.destinations.impl.synapse.sql_client import SynapseSqlClient
 from dlt.destinations.impl.synapse.configuration import SynapseClientConfiguration
+from dlt.destinations.impl.synapse.synapse_adapter import (
+    TABLE_INDEX_TYPE_HINT,
+    TTableIndexType,
+)
 
 
 HINT_TO_SYNAPSE_ATTR: Dict[TColumnHint, str] = {
@@ -68,7 +72,7 @@ def _get_table_update_sql(
         if table is None:
             table_index_type = self.config.default_table_index_type
         else:
-            table_index_type = table.get("table_index_type")
+            table_index_type = cast(TTableIndexType, table.get(TABLE_INDEX_TYPE_HINT))
             if table_index_type == "clustered_columnstore_index":
                 new_columns = self._get_columstore_valid_columns(new_columns)
 
@@ -128,9 +132,16 @@ def get_load_table(self, table_name: str, staging: bool = False) -> TTableSchema
             # configuration. Why? "For small lookup tables, less than 60 million rows,
             # consider using HEAP or clustered index for faster query performance."
             # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#heap-tables
-            table["table_index_type"] = "heap"
-        if table["table_index_type"] is None:
-            table["table_index_type"] = self.config.default_table_index_type
+            table[TABLE_INDEX_TYPE_HINT] = "heap"  # type: ignore[typeddict-unknown-key]
+        elif table_name in self.schema.data_table_names():
+            if TABLE_INDEX_TYPE_HINT not in table:
+                # If present in parent table, fetch hint from there.
+                table[TABLE_INDEX_TYPE_HINT] = get_inherited_table_hint(  # type: ignore[typeddict-unknown-key]
+                    self.schema.tables, table_name, TABLE_INDEX_TYPE_HINT, allow_none=True
+                )
+        if table[TABLE_INDEX_TYPE_HINT] is None:  # type: ignore[typeddict-item]
+            # Hint still not defined, fall back to default.
+            table[TABLE_INDEX_TYPE_HINT] = self.config.default_table_index_type  # type: ignore[typeddict-unknown-key]
         return table
 
     def get_storage_table_index_type(self, table_name: str) -> TTableIndexType:
diff --git a/dlt/destinations/impl/synapse/synapse_adapter.py b/dlt/destinations/impl/synapse/synapse_adapter.py
new file mode 100644
index 0000000000..f135dd967a
--- /dev/null
+++ b/dlt/destinations/impl/synapse/synapse_adapter.py
@@ -0,0 +1,50 @@
+from typing import Any, Literal, Set, get_args, Final
+
+from dlt.extract import DltResource, resource as make_resource
+from dlt.extract.typing import TTableHintTemplate
+from dlt.extract.hints import TResourceHints
+from dlt.destinations.utils import ensure_resource
+
+TTableIndexType = Literal["heap", "clustered_columnstore_index"]
+"""
+Table [index type](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index) used when creating the Synapse table.
+This regards indexes specified at the table level, not the column level.
+"""
+TABLE_INDEX_TYPES: Set[TTableIndexType] = set(get_args(TTableIndexType))
+
+TABLE_INDEX_TYPE_HINT: Literal["x-table-index-type"] = "x-table-index-type"
+
+
+def synapse_adapter(data: Any, table_index_type: TTableIndexType = None) -> DltResource:
+    """Prepares data for the Synapse destination by specifying which table index
+    type should be used.
+
+    Args:
+        data (Any): The data to be transformed. It can be raw data or an instance
+            of DltResource. If raw data, the function wraps it into a DltResource
+            object.
+        table_index_type (TTableIndexType, optional): The table index type used when creating
+            the Synapse table.
+
+    Returns:
+        DltResource: A resource with applied Synapse-specific hints.
+
+    Raises:
+        ValueError: If input for `table_index_type` is invalid.
+
+    Examples:
+        >>> data = [{"name": "Anush", "description": "Integrations Hacker"}]
+        >>> synapse_adapter(data, table_index_type="clustered_columnstore_index")
+        [DltResource with hints applied]
+    """
+    resource = ensure_resource(data)
+
+    if table_index_type is not None:
+        if table_index_type not in TABLE_INDEX_TYPES:
+            allowed_types = ", ".join(TABLE_INDEX_TYPES)
+            raise ValueError(
+                f"Table index type {table_index_type} is invalid. Allowed table index"
+                f" types are: {allowed_types}."
+            )
+        resource._hints[TABLE_INDEX_TYPE_HINT] = table_index_type  # type: ignore[typeddict-unknown-key]
+    return resource
diff --git a/dlt/destinations/impl/weaviate/weaviate_adapter.py b/dlt/destinations/impl/weaviate/weaviate_adapter.py
index 2d5161d9e9..a290ac65b4 100644
--- a/dlt/destinations/impl/weaviate/weaviate_adapter.py
+++ b/dlt/destinations/impl/weaviate/weaviate_adapter.py
@@ -2,6 +2,7 @@
 
 from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
 from dlt.extract import DltResource, resource as make_resource
+from dlt.destinations.utils import ensure_resource
 
 TTokenizationTMethod = Literal["word", "lowercase", "whitespace", "field"]
 TOKENIZATION_METHODS: Set[TTokenizationTMethod] = set(get_args(TTokenizationTMethod))
@@ -53,15 +54,7 @@ def weaviate_adapter(
         >>> weaviate_adapter(data, vectorize="description", tokenization={"description": "word"})
         [DltResource with hints applied]
     """
-    # wrap `data` in a resource if not an instance already
-    resource: DltResource
-    if not isinstance(data, DltResource):
-        resource_name: str = None
-        if not hasattr(data, "__name__"):
-            resource_name = "content"
-        resource = make_resource(data, name=resource_name)
-    else:
-        resource = data
+    resource = ensure_resource(data)
 
     column_hints: TTableSchemaColumns = {}
     if vectorize:
diff --git a/dlt/destinations/utils.py b/dlt/destinations/utils.py
new file mode 100644
index 0000000000..d4b945a840
--- /dev/null
+++ b/dlt/destinations/utils.py
@@ -0,0 +1,16 @@
+from typing import Any
+
+from dlt.extract import DltResource, resource as make_resource
+
+
+def ensure_resource(data: Any) -> DltResource:
+    """Wraps `data` in a DltResource if it's not a DltResource already."""
+    resource: DltResource
+    if not isinstance(data, DltResource):
+        resource_name: str = None
+        if not hasattr(data, "__name__"):
+            resource_name = "content"
+        resource = make_resource(data, name=resource_name)
+    else:
+        resource = data
+    return resource
diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py
index 573d3d3ad0..cf7426e683 100644
--- a/dlt/extract/decorators.py
+++ b/dlt/extract/decorators.py
@@ -36,7 +36,6 @@
     TAnySchemaColumns,
     TSchemaContract,
     TTableFormat,
-    TTableIndexType,
 )
 from dlt.extract.utils import (
     ensure_table_schema_columns_hint,
@@ -257,7 +256,6 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
-    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
 ) -> DltResource: ...
@@ -275,7 +273,6 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
-    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
 ) -> Callable[[Callable[TResourceFunParams, Any]], DltResource]: ...
@@ -293,7 +290,6 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
-    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
     standalone: Literal[True] = True,
@@ -312,7 +308,6 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
-    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
 ) -> DltResource: ...
@@ -329,7 +324,6 @@ def resource(
     merge_key: TTableHintTemplate[TColumnNames] = None,
     schema_contract: TTableHintTemplate[TSchemaContract] = None,
     table_format: TTableHintTemplate[TTableFormat] = None,
-    table_index_type: TTableHintTemplate[TTableIndexType] = None,
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
     standalone: bool = False,
@@ -409,7 +403,6 @@ def make_resource(
             merge_key=merge_key,
             schema_contract=schema_contract,
             table_format=table_format,
-            table_index_type=table_index_type,
         )
         return DltResource.from_data(
             _data,
diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py
index 36354eb0da..437dbbc6bd 100644
--- a/dlt/extract/hints.py
+++ b/dlt/extract/hints.py
@@ -12,7 +12,6 @@
     TWriteDisposition,
     TAnySchemaColumns,
     TTableFormat,
-    TTableIndexType,
     TSchemaContract,
 )
 from dlt.common.typing import TDataItem
@@ -275,7 +274,6 @@ def new_table_template(
         merge_key: TTableHintTemplate[TColumnNames] = None,
         schema_contract: TTableHintTemplate[TSchemaContract] = None,
         table_format: TTableHintTemplate[TTableFormat] = None,
-        table_index_type: TTableHintTemplate[TTableIndexType] = None,
     ) -> TResourceHints:
         validator, schema_contract = create_item_validator(columns, schema_contract)
         clean_columns = columns
@@ -291,7 +289,6 @@ def new_table_template(
             columns=clean_columns,  # type: ignore
             schema_contract=schema_contract,  # type: ignore
             table_format=table_format,  # type: ignore
-            table_index_type=table_index_type,  # type: ignore
         )
         if not table_name:
             new_template.pop("name")
diff --git a/tests/load/pipeline/test_table_indexing.py b/tests/load/synapse/test_table_indexing.py
similarity index 81%
rename from tests/load/pipeline/test_table_indexing.py
rename to tests/load/synapse/test_table_indexing.py
index 5f62cddfee..097bde09f9 100644
--- a/tests/load/pipeline/test_table_indexing.py
+++ b/tests/load/synapse/test_table_indexing.py
@@ -5,16 +5,13 @@
 
 import dlt
 from dlt.common.schema import TColumnSchema
-from dlt.common.schema.typing import TTableIndexType, TSchemaTables
-from dlt.common.schema.utils import get_table_index_type
 
 from dlt.destinations.sql_client import SqlClientBase
 
+from dlt.destinations.impl.synapse import synapse_adapter
+from dlt.destinations.impl.synapse.synapse_adapter import TTableIndexType
+
 from tests.load.utils import TABLE_UPDATE, TABLE_ROW_ALL_DATA_TYPES
-from tests.load.pipeline.utils import (
-    destinations_configs,
-    DestinationTestConfiguration,
-)
 
 
 TABLE_INDEX_TYPE_COLUMN_SCHEMA_PARAM_GRID = [
@@ -27,16 +24,10 @@
 ]
 
 
-@pytest.mark.parametrize(
-    "destination_config",
-    destinations_configs(default_sql_configs=True, subset=["synapse"]),
-    ids=lambda x: x.name,
-)
 @pytest.mark.parametrize(
     "table_index_type,column_schema", TABLE_INDEX_TYPE_COLUMN_SCHEMA_PARAM_GRID
 )
 def test_default_table_index_type_configuration(
-    destination_config: DestinationTestConfiguration,
     table_index_type: TTableIndexType,
     column_schema: Union[List[TColumnSchema], None],
 ) -> None:
@@ -51,10 +42,13 @@ def test_default_table_index_type_configuration(
     def items_without_table_index_type_specified() -> Iterator[Any]:
         yield TABLE_ROW_ALL_DATA_TYPES
 
-    pipeline = destination_config.setup_pipeline(
-        f"test_default_table_index_type_{table_index_type}",
+    pipeline = dlt.pipeline(
+        pipeline_name=f"test_default_table_index_type_{table_index_type}",
+        destination="synapse",
+        dataset_name=f"test_default_table_index_type_{table_index_type}",
         full_refresh=True,
     )
+
     job_client = pipeline.destination_client()
     # Assert configuration value gets properly propagated to job client configuration.
     assert job_client.config.default_table_index_type == table_index_type  # type: ignore[attr-defined]
@@ -80,13 +74,14 @@ def items_without_table_index_type_specified() -> Iterator[Any]:
         @dlt.resource(
             name="items_with_table_index_type_specified",
             write_disposition="append",
-            table_index_type="clustered_columnstore_index",
             columns=column_schema,
         )
         def items_with_table_index_type_specified() -> Iterator[Any]:
             yield TABLE_ROW_ALL_DATA_TYPES
 
-        pipeline.run(items_with_table_index_type_specified)
+        pipeline.run(
+            synapse_adapter(items_with_table_index_type_specified, "clustered_columnstore_index")
+        )
         applied_table_index_type = job_client.get_storage_table_index_type(  # type: ignore[attr-defined]
             "items_with_table_index_type_specified"
         )
@@ -95,35 +90,34 @@ def items_with_table_index_type_specified() -> Iterator[Any]:
         assert applied_table_index_type == "clustered_columnstore_index"
 
 
-@pytest.mark.parametrize(
-    "destination_config",
-    destinations_configs(default_sql_configs=True, subset=["synapse"]),
-    ids=lambda x: x.name,
-)
 @pytest.mark.parametrize(
     "table_index_type,column_schema", TABLE_INDEX_TYPE_COLUMN_SCHEMA_PARAM_GRID
 )
 def test_resource_table_index_type_configuration(
-    destination_config: DestinationTestConfiguration,
     table_index_type: TTableIndexType,
     column_schema: Union[List[TColumnSchema], None],
 ) -> None:
     @dlt.resource(
         name="items_with_table_index_type_specified",
         write_disposition="append",
-        table_index_type=table_index_type,
         columns=column_schema,
     )
     def items_with_table_index_type_specified() -> Iterator[Any]:
         yield TABLE_ROW_ALL_DATA_TYPES
 
-    pipeline = destination_config.setup_pipeline(
-        f"test_table_index_type_{table_index_type}",
+    pipeline = dlt.pipeline(
+        pipeline_name=f"test_table_index_type_{table_index_type}",
+        destination="synapse",
+        dataset_name=f"test_table_index_type_{table_index_type}",
         full_refresh=True,
     )
 
+    # An invalid value for `table_index_type` should raise a ValueError.
+    with pytest.raises(ValueError):
+        pipeline.run(synapse_adapter(items_with_table_index_type_specified, "foo"))  # type: ignore[arg-type]
+
     # Run the pipeline and create the tables.
-    pipeline.run(items_with_table_index_type_specified)
+    pipeline.run(synapse_adapter(items_with_table_index_type_specified, table_index_type))
 
     # For all tables, assert the applied index type equals the expected index type.
     # Child tables, if any, inherit the index type of their parent.

From b4cdd36e41af7e13849e255133a2654dde79ac7e Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Fri, 26 Jan 2024 22:06:11 +0100
Subject: [PATCH 09/23] moved test function into tests folder and renamed test
 file

---
 dlt/destinations/impl/synapse/synapse.py      | 18 --------------
 ...xing.py => test_synapse_table_indexing.py} | 10 ++++----
 tests/load/synapse/utils.py                   | 24 +++++++++++++++++++
 3 files changed, 30 insertions(+), 22 deletions(-)
 rename tests/load/synapse/{test_table_indexing.py => test_synapse_table_indexing.py} (91%)
 create mode 100644 tests/load/synapse/utils.py

diff --git a/dlt/destinations/impl/synapse/synapse.py b/dlt/destinations/impl/synapse/synapse.py
index d34fef1ab4..eb6eae3f20 100644
--- a/dlt/destinations/impl/synapse/synapse.py
+++ b/dlt/destinations/impl/synapse/synapse.py
@@ -144,24 +144,6 @@ def get_load_table(self, table_name: str, staging: bool = False) -> TTableSchema
             table[TABLE_INDEX_TYPE_HINT] = self.config.default_table_index_type  # type: ignore[typeddict-unknown-key]
         return table
 
-    def get_storage_table_index_type(self, table_name: str) -> TTableIndexType:
-        """Returns table index type of table in storage destination."""
-        with self.sql_client as sql_client:
-            schema_name = sql_client.fully_qualified_dataset_name(escape=False)
-            sql = dedent(f"""
-                SELECT
-                    CASE i.type_desc
-                        WHEN 'HEAP' THEN 'heap'
-                        WHEN 'CLUSTERED COLUMNSTORE' THEN 'clustered_columnstore_index'
-                    END AS table_index_type
-                FROM sys.indexes i
-                INNER JOIN sys.tables t ON t.object_id = i.object_id
-                INNER JOIN sys.schemas s ON s.schema_id = t.schema_id
-                WHERE s.name = '{schema_name}' AND t.name = '{table_name}'
-            """)
-            table_index_type = sql_client.execute_sql(sql)[0][0]
-            return cast(TTableIndexType, table_index_type)
-
     def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> LoadJob:
         job = super().start_file_load(table, file_path, load_id)
         if not job:
diff --git a/tests/load/synapse/test_table_indexing.py b/tests/load/synapse/test_synapse_table_indexing.py
similarity index 91%
rename from tests/load/synapse/test_table_indexing.py
rename to tests/load/synapse/test_synapse_table_indexing.py
index 097bde09f9..af4786af9f 100644
--- a/tests/load/synapse/test_table_indexing.py
+++ b/tests/load/synapse/test_synapse_table_indexing.py
@@ -12,6 +12,7 @@
 from dlt.destinations.impl.synapse.synapse_adapter import TTableIndexType
 
 from tests.load.utils import TABLE_UPDATE, TABLE_ROW_ALL_DATA_TYPES
+from tests.load.synapse.utils import get_storage_table_index_type
 
 
 TABLE_INDEX_TYPE_COLUMN_SCHEMA_PARAM_GRID = [
@@ -60,7 +61,7 @@ def items_without_table_index_type_specified() -> Iterator[Any]:
     # Child tables, if any, inherit the index type of their parent.
     tables = pipeline.default_schema.tables
     for table_name in tables:
-        applied_table_index_type = job_client.get_storage_table_index_type(table_name)  # type: ignore[attr-defined]
+        applied_table_index_type = get_storage_table_index_type(job_client.sql_client, table_name)  # type: ignore[attr-defined]
         if table_name in pipeline.default_schema.data_table_names():
             # For data tables, the applied table index type should be the default value.
             assert applied_table_index_type == job_client.config.default_table_index_type  # type: ignore[attr-defined]
@@ -82,8 +83,9 @@ def items_with_table_index_type_specified() -> Iterator[Any]:
         pipeline.run(
             synapse_adapter(items_with_table_index_type_specified, "clustered_columnstore_index")
         )
-        applied_table_index_type = job_client.get_storage_table_index_type(  # type: ignore[attr-defined]
-            "items_with_table_index_type_specified"
+        applied_table_index_type = get_storage_table_index_type(
+            job_client.sql_client,  # type: ignore[attr-defined]
+            "items_with_table_index_type_specified",
         )
         # While the default is "heap", the applied index type should be "clustered_columnstore_index"
         # because it was provided as argument to the resource.
@@ -124,7 +126,7 @@ def items_with_table_index_type_specified() -> Iterator[Any]:
     job_client = pipeline.destination_client()
     tables = pipeline.default_schema.tables
     for table_name in tables:
-        applied_table_index_type = job_client.get_storage_table_index_type(table_name)  # type: ignore[attr-defined]
+        applied_table_index_type = get_storage_table_index_type(job_client.sql_client, table_name)  # type: ignore[attr-defined]
         if table_name in pipeline.default_schema.data_table_names():
             # For data tables, the applied table index type should be the type
             # configured in the resource.
diff --git a/tests/load/synapse/utils.py b/tests/load/synapse/utils.py
new file mode 100644
index 0000000000..cd53716878
--- /dev/null
+++ b/tests/load/synapse/utils.py
@@ -0,0 +1,24 @@
+from typing import cast
+from textwrap import dedent
+
+from dlt.destinations.impl.synapse.sql_client import SynapseSqlClient
+from dlt.destinations.impl.synapse.synapse_adapter import TTableIndexType
+
+
+def get_storage_table_index_type(sql_client: SynapseSqlClient, table_name: str) -> TTableIndexType:
+    """Returns table index type of table in storage destination."""
+    with sql_client:
+        schema_name = sql_client.fully_qualified_dataset_name(escape=False)
+        sql = dedent(f"""
+            SELECT
+                CASE i.type_desc
+                    WHEN 'HEAP' THEN 'heap'
+                    WHEN 'CLUSTERED COLUMNSTORE' THEN 'clustered_columnstore_index'
+                END AS table_index_type
+            FROM sys.indexes i
+            INNER JOIN sys.tables t ON t.object_id = i.object_id
+            INNER JOIN sys.schemas s ON s.schema_id = t.schema_id
+            WHERE s.name = '{schema_name}' AND t.name = '{table_name}'
+        """)
+        table_index_type = sql_client.execute_sql(sql)[0][0]
+        return cast(TTableIndexType, table_index_type)

From 97f66e28681e53fbfa8fac060f4f85d5cf05b82d Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Sat, 27 Jan 2024 10:25:19 +0100
Subject: [PATCH 10/23] ensure test data gets removed

---
 tests/load/synapse/test_synapse_table_indexing.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/load/synapse/test_synapse_table_indexing.py b/tests/load/synapse/test_synapse_table_indexing.py
index af4786af9f..e87b83fa3f 100644
--- a/tests/load/synapse/test_synapse_table_indexing.py
+++ b/tests/load/synapse/test_synapse_table_indexing.py
@@ -12,6 +12,9 @@
 from dlt.destinations.impl.synapse.synapse_adapter import TTableIndexType
 
 from tests.load.utils import TABLE_UPDATE, TABLE_ROW_ALL_DATA_TYPES
+from tests.load.pipeline.utils import (
+    drop_pipeline,
+)  # this import ensures all test data gets removed
 from tests.load.synapse.utils import get_storage_table_index_type
 
 

From 90685e7105c3b3f7c2a5981359fd6453cdafc721 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Sat, 27 Jan 2024 11:30:00 +0100
Subject: [PATCH 11/23] add pyarrow to synapse dependencies for parquet loading

---
 poetry.lock    | 4 ++--
 pyproject.toml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 400bcb61e2..6b5625e10a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -8466,10 +8466,10 @@ qdrant = ["qdrant-client"]
 redshift = ["psycopg2-binary", "psycopg2cffi"]
 s3 = ["botocore", "s3fs"]
 snowflake = ["snowflake-connector-python"]
-synapse = ["adlfs", "pyodbc"]
+synapse = ["adlfs", "pyarrow", "pyodbc"]
 weaviate = ["weaviate-client"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.13"
-content-hash = "75a5f533e9456898ad0157b699d76d9c5a1abf8f4cd04ed7be2235ae3198e16c"
+content-hash = "61fa24ff52200b5bf97906a376826f00350abc8f6810fb2fcea73abaf245437f"
diff --git a/pyproject.toml b/pyproject.toml
index f6ae77b593..fab301ad02 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -96,7 +96,7 @@ cli = ["pipdeptree", "cron-descriptor"]
 athena = ["pyathena", "pyarrow", "s3fs", "botocore"]
 weaviate = ["weaviate-client"]
 mssql = ["pyodbc"]
-synapse = ["pyodbc", "adlfs"]
+synapse = ["pyodbc", "adlfs", "pyarrow"]
 qdrant = ["qdrant-client"]
 
 [tool.poetry.scripts]

From 494e45b7b15ca041bdce15e66fcd52df59096b4d Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Sun, 28 Jan 2024 00:35:10 +0100
Subject: [PATCH 12/23] added user docs for synapse destination

---
 dlt/destinations/impl/synapse/README.md       |  58 -----
 .../docs/dlt-ecosystem/destinations/mssql.md  |  14 +-
 .../dlt-ecosystem/destinations/synapse.md     | 208 ++++++++++++++++++
 3 files changed, 214 insertions(+), 66 deletions(-)
 delete mode 100644 dlt/destinations/impl/synapse/README.md
 create mode 100644 docs/website/docs/dlt-ecosystem/destinations/synapse.md

diff --git a/dlt/destinations/impl/synapse/README.md b/dlt/destinations/impl/synapse/README.md
deleted file mode 100644
index b133faf67a..0000000000
--- a/dlt/destinations/impl/synapse/README.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# Set up loader user
-Execute the following SQL statements to set up the [loader](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql/data-loading-best-practices#create-a-loading-user) user:
-```sql
--- on master database
-
-CREATE LOGIN loader WITH PASSWORD = 'YOUR_LOADER_PASSWORD_HERE';
-```
-
-```sql
--- on minipool database
-
-CREATE USER loader FOR LOGIN loader;
-
--- DDL permissions
-GRANT CREATE TABLE ON DATABASE :: minipool TO loader;
-GRANT CREATE VIEW ON DATABASE :: minipool TO loader;
-
--- DML permissions
-GRANT SELECT ON DATABASE :: minipool TO loader;
-GRANT INSERT ON DATABASE :: minipool TO loader;
-GRANT ADMINISTER DATABASE BULK OPERATIONS TO loader;
-```
-
-```sql
--- https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-workload-isolation
-
-CREATE WORKLOAD GROUP DataLoads
-WITH ( 
-    MIN_PERCENTAGE_RESOURCE = 0
-    ,CAP_PERCENTAGE_RESOURCE = 50
-    ,REQUEST_MIN_RESOURCE_GRANT_PERCENT = 25
-);
-
-CREATE WORKLOAD CLASSIFIER [wgcELTLogin]
-WITH (
-    WORKLOAD_GROUP = 'DataLoads'
-    ,MEMBERNAME = 'loader'
-);
-```
-
-# config.toml
-```toml
-[destination.synapse.credentials]
-database = "minipool"
-username = "loader"
-host = "dlt-synapse-ci.sql.azuresynapse.net"
-port = 1433
-driver = "ODBC Driver 18 for SQL Server"
-
-[destination.synapse]
-create_indexes = false
-```
-
-# secrets.toml
-```toml
-[destination.synapse.credentials]
-password = "YOUR_LOADER_PASSWORD_HERE"
-```
\ No newline at end of file
diff --git a/docs/website/docs/dlt-ecosystem/destinations/mssql.md b/docs/website/docs/dlt-ecosystem/destinations/mssql.md
index d64cf9b400..e98f8bf256 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/mssql.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/mssql.md
@@ -16,16 +16,14 @@ pip install dlt[mssql]
 
 ### Prerequisites
 
-Microsoft ODBC driver for SQL Server must be installed to use this destination.
-This can't be included with `dlt`s python dependencies so you must installed it separately on your system.
+_Microsoft ODBC Driver for SQL Server_ must be installed to use this destination.
+This can't be included with `dlt`'s python dependencies, so you must install it separately on your system. You can find the official installation instructions [here](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16).
 
-See instructions here to [install Microsoft ODBC Driver 18 for SQL Server on Windows, Mac and Linux](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16)
+Supported driver versions:
+* `ODBC Driver 18 for SQL Server`
+* `ODBC Driver 17 for SQL Server`
 
-Following ODBC drivers are supported:
-* ODBC Driver 18 for SQL Server
-* ODBC Driver 17 for SQL Server
-
-[You can configure driver name explicitly](#additional-destination-options) as well.
+You can [configure driver name](#additional-destination-options) explicitly as well.
 
 ### Create a pipeline
 
diff --git a/docs/website/docs/dlt-ecosystem/destinations/synapse.md b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
new file mode 100644
index 0000000000..4d66714ce3
--- /dev/null
+++ b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
@@ -0,0 +1,208 @@
+---
+title: Azure Synapse
+description: Azure Synapse `dlt` destination
+keywords: [synapse, destination, data warehouse]
+---
+
+# Synapse
+
+## Install dlt with Synapse
+**To install the DLT library with Synapse dependencies:**
+```
+pip install dlt[synapse]
+```
+
+## Setup guide
+
+### Prerequisites
+
+* **Microsoft ODBC Driver for SQL Server**
+
+    _Microsoft ODBC Driver for SQL Server_ must be installed to use this destination.
+    This can't be included with `dlt`'s python dependencies, so you must install it separately on your system. You can find the official installation instructions [here](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16).
+
+    Supported driver versions:
+    * `ODBC Driver 18 for SQL Server`
+
+    > 💡 Older driver versions don't properly work, because they don't support the `LongAsMax` keyword that got [introduced](https://learn.microsoft.com/en-us/sql/connect/odbc/windows/features-of-the-microsoft-odbc-driver-for-sql-server-on-windows?view=sql-server-ver15#microsoft-odbc-driver-180-for-sql-server-on-windows) in `ODBC Driver 18 for SQL Server`. Synapse does not support the legacy ["long data types"](https://learn.microsoft.com/en-us/sql/t-sql/data-types/ntext-text-and-image-transact-sql), and requires "max data types" instead. `dlt` uses the `LongAsMax` keyword to automatically do the conversion.
+* **Azure Synapse Workspace and dedicated SQL pool**
+
+    You need an Azure Synapse workspace with a dedicated SQL pool to load data into. If you don't have one yet, you can use this [quickstart](https://learn.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-sql-pool-studio).
+
+### Steps
+
+**1. Initialize a project with a pipeline that loads to Synapse by running**
+```
+dlt init chess synapse
+```
+
+**2. Install the necessary dependencies for Synapse by running**
+```
+pip install -r requirements.txt
+```
+This will install `dlt` with the **synapse** extra that contains all dependencies required for the Synapse destination.
+
+**3. Create a loader user**
+
+Execute the following SQL statements to set up the [loader](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql/data-loading-best-practices#create-a-loading-user) user. Change the password and replace `yourpool` with the name of your dedicated SQL pool:
+```sql
+-- on master database, using a SQL admin account
+
+CREATE LOGIN loader WITH PASSWORD = 'your_loader_password';
+```
+
+```sql
+-- on yourpool database
+
+CREATE USER loader FOR LOGIN loader;
+
+-- DDL permissions
+GRANT CREATE SCHEMA ON DATABASE :: yourpool TO loader;
+GRANT CREATE TABLE ON DATABASE :: yourpool TO loader;
+GRANT CREATE VIEW ON DATABASE :: yourpool TO loader;
+
+-- DML permissions
+GRANT ADMINISTER DATABASE BULK OPERATIONS TO loader; -- only required when loading from staging Storage Account
+```
+
+Optionally, you can create a `WORKLOAD GROUP` and add the `loader` user as a member to manage [workload isolation](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-workload-isolation). See the [instructions](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql/data-loading-best-practices#create-a-loading-user) on setting up a loader user for an example of how to do this.
+
+**3. Enter your credentials into `.dlt/secrets.toml`.**
+
+Example, replace with your database connection info:
+```toml
+[destination.synapse.credentials]
+database = "yourpool"
+username = "loader"
+password = "your_loader_password"
+host = "your_synapse_workspace_name.sql.azuresynapse.net"
+```
+
+Equivalently, you can also pass a connection string as follows:
+
+```toml
+# keep it at the top of your toml file! before any section starts
+destination.synapse.credentials = "synapse://loader:your_loader_password@your_synapse_workspace_name.azuresynapse.net/yourpool"
+```
+
+To pass credentials directly you can use the `credentials` argument of `dlt.destinations.synapse(...)`:
+```python
+pipeline = dlt.pipeline(
+    pipeline_name='chess',
+    destination=dlt.destinations.synapse(
+        credentials='synapse://loader:your_loader_password@your_synapse_workspace_name.azuresynapse.net/yourpool'
+    ),
+    dataset_name='chess_data'
+)
+```
+
+## Write disposition
+All write dispositions are supported
+
+If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized`, the destination tables will be dropped and replaced by the staging tables with an `ALTER SCHEMA ... TRANSFER` command. Please note that this operation is **not** atomic—it involves multiple DDL commands and Synapse does not support DDL transactions.
+
+## Data loading
+Data is loaded via `INSERT` statements by default.
+
+> 💡 Multi-row `INSERT INTO ... VALUES` statements are **not** possible in Synapse, because it doesn't support the [Table Value Constructor](https://learn.microsoft.com/en-us/sql/t-sql/queries/table-value-constructor-transact-sql). `dlt` uses `INSERT INTO ... SELECT ... UNION` statements as described [here](https://stackoverflow.com/a/73579830) to work around this limitation.
+
+## Supported file formats
+* [insert-values](../file-formats/insert-format.md) is used by default
+* [parquet](../file-formats/parquet.md) is used when [staging](#staging-support) is enabled
+
+## Data type limitations
+* **Synapse cannot load `TIME` columns from `parquet` files**. `dlt` will fail such jobs permanently. Use the `insert_values` file format instead, or convert `datetime.time` objects to `str` or `datetime.datetime`, to load `TIME` columns.
+* **Synapse does not have a complex/JSON/struct data type**. The `dlt` `complex` data type is mapped to the `nvarchar` type in Synapse.
+
+## Table index type
+The [table index type](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index) of the created tables can be configured at the resource level with the `synapse_adapter`:
+
+```python
+info = pipeline.run(
+    synapse_adapter(
+        data=your_resource,
+        table_index_type="clustered_columnstore_index",
+    )
+)
+```
+
+Possible values:
+* `heap`: create [HEAP](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#heap-tables) tables that do not have an index **(default)**
+* `clustered_columnstore_index`: create [CLUSTERED COLUMNSTORE INDEX](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#clustered-columnstore-indexes) tables
+
+
+> ❗ Important:
+>* **Set `default_table_index_type` to `"clustered_columnstore_index"` if you want to change the default** (see [additional destination options](#additional-destination-options)).
+>* **CLUSTERED COLUMNSTORE INDEX tables do not support the `varchar(max)`, `nvarchar(max)`, and `varbinary(max)` data types.** If you don't specify the `precision` for columns that map to any of these types, `dlt` will use the maximum lengths `varchar(4000)`, `nvarchar(4000)`, and `varbinary(8000)`.
+>* **While Synapse creates CLUSTERED COLUMNSTORE INDEXES by default, `dlt` creates HEAP tables by default.** HEAP is a more robust choice, because it supports all data types and doesn't require conversions.
+>* **When using the `staging-optimized` [`replace` strategy](../../general-usage/full-loading.md), the staging tables are always created as HEAP tables**—any configuration of the table index types is ignored. The HEAP strategy makes sense
+ for staging tables for reasons explained [here](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#heap-tables).
+>* **`dlt` system tables are always created as HEAP tables, regardless of any configuration.** This is in line with Microsoft's recommendation that "for small lookup tables, less than 60 million rows, consider using HEAP or clustered index for faster query performance." 
+>* Child tables, if any, inherent the table index type of their parent table. 
+
+## Supported column hints
+
+Synapse supports the following [column hints](https://dlthub.com/docs/general-usage/schema#tables-and-columns):
+
+* `primary_key` - creates a `PRIMARY KEY NONCLUSTERED NOT ENFORCED` constraint on the column
+* `unique` - creates a `UNIQUE NOT ENFORCED` constraint on the column
+
+> ❗ These hints are **disabled by default**. This is because the `PRIMARY KEY` and `UNIQUE` [constraints](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-table-constraints) are tricky in Synapse: they are **not enforced** and can lead to innacurate results if the user does not ensure all column values are unique. For the column hints to take effect, the `create_indexes` configuration needs to be set to `True`, see [additional destination options](#additional-destination-options).
+
+## Load concurrency issue
+`dlt` uses threading to enable concurrent processing and [parallel loading](../../reference/performance.md#load). Concurrency does not work properly in all cases when using the `staging-optimized` [`replace` strategy](../../general-usage/full-loading.md), because Synapse suspends the CTAS queries that `dlt` uses behind the scenes and gets stuck. To prevent this from happening, `dlt` automatically sets the number of load workers to 1 to disable concurrency when replacing data using the `staging-optimized` strategy. Set `auto_disable_concurrency = "false"` if you don't want this to happen (see [additional destination options](#additional-destination-options))
+
+## Staging support
+Synapse supports Azure Blob Storage (both standard and [ADLS Gen2](https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction)) as a file staging destination. `dlt` first uploads Parquet files to the blob container, and then instructs Synapse to read the Parquet file and load its data into a Synapse table using the [COPY INTO](https://learn.microsoft.com/en-us/sql/t-sql/statements/copy-into-transact-sql) statement.
+
+Please refer to the [Azure Blob Storage filesystem documentation](./filesystem.md#azure-blob-storage) to learn how to configure credentials for the staging destination. By default, `dlt` will use these credentials for both the write into the blob container, and the read from it to load into Synapse. Managed Identity authentication can be enabled through the `staging_use_msi` option (see [additional destination options](#additional-destination-options)).
+
+To run Synapse with staging on Azure Blob Storage:
+
+```python
+# Create a dlt pipeline that will load
+# chess player data to the snowflake destination
+# via staging on Azure Blob Storage
+pipeline = dlt.pipeline(
+    pipeline_name='chess_pipeline',
+    destination='synapse',
+    staging='filesystem', # add this to activate the staging location
+    dataset_name='player_data'
+)
+```
+
+## Additional destination options
+The following settings can optionally be configured:
+```toml
+[destination.synapse]
+default_table_index_type = "heap"
+create_indexes = "false"
+auto_disable_concurrency = "true"
+staging_use_msi = "false"
+
+[destination.synapse.credentials]
+port = "1433"
+connect_timeout = 15
+```
+
+`port` and `connect_timeout` can also be included in the connection string:
+
+```toml
+# keep it at the top of your toml file! before any section starts
+destination.synapse.credentials = "synapse://loader:your_loader_password@your_synapse_workspace_name.azuresynapse.net:1433/yourpool?connect_timeout=15"
+```
+
+Descriptions:
+- `default_table_index_type` sets the [table index type](#table-index-type) that is used if no table index type is specified on the resource. 
+- `create_indexes` determines if `primary_key` and `unique` [column hints](#supported-column-hints) are applied.
+- `auto_disable_concurrency` determines if concurrency is automatically disabled in cases where it might cause issues.
+- `staging_use_msi` determines if the Managed Identity of the Synapse workspace is used to authorize access to the [staging](#staging-support) Storage Account. Ensure the Managed Identity has the [Storage Blob Data Reader](https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles#storage-blob-data-reader) role (or a higher-priviliged role) assigned on the blob container if you set this option to `"true"`.
+- `port` used for the ODBC connection.
+- `connect_timeout` sets the timeout for the `pyodbc` connection attempt, in seconds.
+
+### dbt support
+Integration with [dbt](../transformations/dbt/dbt.md) is currently not supported.
+
+### Syncing of `dlt` state
+This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination).
+

From e8c6b1dcf08cfe03c469526c5f98c8d1159ad539 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Sun, 28 Jan 2024 13:22:44 +0100
Subject: [PATCH 13/23] refactor dbt test skipping to prevent unnecessary venv
 creation

---
 .github/workflows/test_destination_mssql.yml |  4 ++--
 tests/load/pipeline/test_dbt_helper.py       | 20 +++++++++-----------
 tests/load/utils.py                          | 11 ++++++++---
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/test_destination_mssql.yml b/.github/workflows/test_destination_mssql.yml
index b8ea1db2d4..d1da25c067 100644
--- a/.github/workflows/test_destination_mssql.yml
+++ b/.github/workflows/test_destination_mssql.yml
@@ -71,11 +71,11 @@ jobs:
         run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
 
       - run: |
-          poetry run pytest tests/load --ignore tests/load/pipeline/test_dbt_helper.py
+          poetry run pytest tests/load
         if: runner.os != 'Windows'
         name: Run tests Linux/MAC
       - run: |
-          poetry run pytest tests/load --ignore tests/load/pipeline/test_dbt_helper.py
+          poetry run pytest tests/load
         if: runner.os == 'Windows'
         name: Run tests Windows
         shell: cmd
diff --git a/tests/load/pipeline/test_dbt_helper.py b/tests/load/pipeline/test_dbt_helper.py
index e919409311..91318d0f34 100644
--- a/tests/load/pipeline/test_dbt_helper.py
+++ b/tests/load/pipeline/test_dbt_helper.py
@@ -28,7 +28,9 @@ def dbt_venv() -> Iterator[Venv]:
 
 
 @pytest.mark.parametrize(
-    "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name
+    "destination_config",
+    destinations_configs(default_sql_configs=True, supports_dbt=True),
+    ids=lambda x: x.name,
 )
 def test_run_jaffle_package(
     destination_config: DestinationTestConfiguration, dbt_venv: Venv
@@ -37,8 +39,6 @@ def test_run_jaffle_package(
         pytest.skip(
             "dbt-athena requires database to be created and we don't do it in case of Jaffle"
         )
-    if not destination_config.supports_dbt:
-        pytest.skip("dbt is not supported for this destination configuration")
     pipeline = destination_config.setup_pipeline("jaffle_jaffle", full_refresh=True)
     # get runner, pass the env from fixture
     dbt = dlt.dbt.package(pipeline, "https://github.com/dbt-labs/jaffle_shop.git", venv=dbt_venv)
@@ -65,14 +65,13 @@ def test_run_jaffle_package(
 
 
 @pytest.mark.parametrize(
-    "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name
+    "destination_config",
+    destinations_configs(default_sql_configs=True, supports_dbt=True),
+    ids=lambda x: x.name,
 )
 def test_run_chess_dbt(destination_config: DestinationTestConfiguration, dbt_venv: Venv) -> None:
     from docs.examples.chess.chess import chess
 
-    if not destination_config.supports_dbt:
-        pytest.skip("dbt is not supported for this destination configuration")
-
     # provide chess url via environ
     os.environ["CHESS_URL"] = "https://api.chess.com/pub/"
 
@@ -117,16 +116,15 @@ def test_run_chess_dbt(destination_config: DestinationTestConfiguration, dbt_ven
 
 
 @pytest.mark.parametrize(
-    "destination_config", destinations_configs(default_sql_configs=True), ids=lambda x: x.name
+    "destination_config",
+    destinations_configs(default_sql_configs=True, supports_dbt=True),
+    ids=lambda x: x.name,
 )
 def test_run_chess_dbt_to_other_dataset(
     destination_config: DestinationTestConfiguration, dbt_venv: Venv
 ) -> None:
     from docs.examples.chess.chess import chess
 
-    if not destination_config.supports_dbt:
-        pytest.skip("dbt is not supported for this destination configuration")
-
     # provide chess url via environ
     os.environ["CHESS_URL"] = "https://api.chess.com/pub/"
 
diff --git a/tests/load/utils.py b/tests/load/utils.py
index 207e32209f..5fb706985d 100644
--- a/tests/load/utils.py
+++ b/tests/load/utils.py
@@ -152,6 +152,7 @@ def destinations_configs(
     subset: Sequence[str] = (),
     exclude: Sequence[str] = (),
     file_format: Optional[TLoaderFileFormat] = None,
+    supports_dbt: Optional[bool] = None,
 ) -> List[DestinationTestConfiguration]:
     # sanity check
     for item in subset:
@@ -165,7 +166,7 @@ def destinations_configs(
         destination_configs += [
             DestinationTestConfiguration(destination=destination)
             for destination in SQL_DESTINATIONS
-            if destination not in ("athena", "synapse")
+            if destination not in ("athena", "mssql", "synapse")
         ]
         destination_configs += [
             DestinationTestConfiguration(destination="duckdb", file_format="parquet")
@@ -192,9 +193,9 @@ def destinations_configs(
                 extra_info="iceberg",
             )
         ]
-        # dbt for Synapse has some complications and I couldn't get it to pass all tests.
         destination_configs += [
-            DestinationTestConfiguration(destination="synapse", supports_dbt=False)
+            DestinationTestConfiguration(destination="mssql", supports_dbt=False),
+            DestinationTestConfiguration(destination="synapse", supports_dbt=False),
         ]
 
     if default_vector_configs:
@@ -347,6 +348,10 @@ def destinations_configs(
         destination_configs = [
             conf for conf in destination_configs if conf.file_format == file_format
         ]
+    if supports_dbt is not None:
+        destination_configs = [
+            conf for conf in destination_configs if conf.supports_dbt == supports_dbt
+        ]
 
     # filter out excluded configs
     destination_configs = [

From e1e9bb38c48df79b8467ef68ae9f93a781c301b1 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Mon, 29 Jan 2024 00:27:38 +0100
Subject: [PATCH 14/23] replace CTAS with CREATE TABLE to eliminate concurrency
 issues

---
 .../impl/synapse/configuration.py             | 49 -------------------
 dlt/destinations/impl/synapse/factory.py      |  5 +-
 dlt/destinations/impl/synapse/synapse.py      | 20 ++++----
 dlt/pipeline/pipeline.py                      |  4 --
 .../load/pipeline/test_replace_disposition.py |  4 --
 5 files changed, 12 insertions(+), 70 deletions(-)

diff --git a/dlt/destinations/impl/synapse/configuration.py b/dlt/destinations/impl/synapse/configuration.py
index cc0e40114b..bb1ba632dc 100644
--- a/dlt/destinations/impl/synapse/configuration.py
+++ b/dlt/destinations/impl/synapse/configuration.py
@@ -53,60 +53,11 @@ class SynapseClientConfiguration(MsSqlClientConfiguration):
     create_indexes: bool = False
     """Whether `primary_key` and `unique` column hints are applied."""
 
-    # Concurrency is disabled by overriding the configured number of workers to 1 at runtime.
-    auto_disable_concurrency: bool = True
-    """Whether concurrency is automatically disabled in cases where it might cause issues."""
-
     staging_use_msi: bool = False
     """Whether the managed identity of the Synapse workspace is used to authorize access to the staging Storage Account."""
 
     __config_gen_annotations__: ClassVar[List[str]] = [
         "default_table_index_type",
         "create_indexes",
-        "auto_disable_concurrency",
         "staging_use_msi",
     ]
-
-    def get_load_workers(self, tables: TSchemaTables, workers: int) -> int:
-        """Returns the adjusted number of load workers to prevent concurrency issues."""
-
-        write_dispositions = [get_write_disposition(tables, table_name) for table_name in tables]
-        n_replace_dispositions = len([d for d in write_dispositions if d == "replace"])
-        if (
-            n_replace_dispositions > 1
-            and self.replace_strategy == "staging-optimized"
-            and workers > 1
-        ):
-            warning_msg_shared = (
-                'Data is being loaded into Synapse with write disposition "replace"'
-                ' and replace strategy "staging-optimized", while the number of'
-                f" load workers ({workers}) > 1. This configuration is problematic"
-                " in some cases, because Synapse does not always handle concurrency well"
-                " with the CTAS queries that are used behind the scenes to implement"
-                ' the "staging-optimized" strategy.'
-            )
-            if self.auto_disable_concurrency:
-                logger.warning(
-                    warning_msg_shared
-                    + " The number of load workers will be automatically adjusted"
-                    " and set to 1 to eliminate concurrency and prevent potential"
-                    " issues. If you don't want this to happen, set the"
-                    " DESTINATION__SYNAPSE__AUTO_DISABLE_CONCURRENCY environment"
-                    ' variable to "false", or add the following to your config TOML:'
-                    "\n\n[destination.synapse]\nauto_disable_concurrency = false\n"
-                )
-                workers = 1  # adjust workers
-            else:
-                logger.warning(
-                    warning_msg_shared
-                    + " If you experience your pipeline gets stuck and doesn't finish,"
-                    " try reducing the number of load workers by exporting the LOAD__WORKERS"
-                    " environment variable or by setting it in your config TOML:"
-                    "\n\n[load]\nworkers = 1 #  a value of 1 disables all concurrency,"
-                    " but perhaps a higher value also works\n\n"
-                    "Alternatively, you can set the DESTINATION__SYNAPSE__AUTO_DISABLE_CONCURRENCY"
-                    ' environment variable to "true", or add the following to your config TOML'
-                    " to automatically disable concurrency where needed:"
-                    "\n\n[destination.synapse]\nauto_disable_concurrency = true\n"
-                )
-        return workers
diff --git a/dlt/destinations/impl/synapse/factory.py b/dlt/destinations/impl/synapse/factory.py
index 0ac58001ca..b7eddd6ef7 100644
--- a/dlt/destinations/impl/synapse/factory.py
+++ b/dlt/destinations/impl/synapse/factory.py
@@ -30,7 +30,6 @@ def __init__(
         credentials: t.Union[SynapseCredentials, t.Dict[str, t.Any], str] = None,
         default_table_index_type: t.Optional[TTableIndexType] = "heap",
         create_indexes: bool = False,
-        auto_disable_concurrency: bool = True,
         staging_use_msi: bool = False,
         destination_name: t.Optional[str] = None,
         environment: t.Optional[str] = None,
@@ -45,15 +44,13 @@ def __init__(
                 a connection string in the format `synapse://user:password@host:port/database`
             default_table_index_type: Maps directly to the default_table_index_type attribute of the SynapseClientConfiguration object.
             create_indexes: Maps directly to the create_indexes attribute of the SynapseClientConfiguration object.
-            auto_disable_concurrency: Maps directly to the auto_disable_concurrency attribute of the SynapseClientConfiguration object.
-            auto_disable_concurrency: Maps directly to the staging_use_msi attribute of the SynapseClientConfiguration object.
+            staging_use_msi: Maps directly to the staging_use_msi attribute of the SynapseClientConfiguration object.
             **kwargs: Additional arguments passed to the destination config
         """
         super().__init__(
             credentials=credentials,
             default_table_index_type=default_table_index_type,
             create_indexes=create_indexes,
-            auto_disable_concurrency=auto_disable_concurrency,
             staging_use_msi=staging_use_msi,
             destination_name=destination_name,
             environment=environment,
diff --git a/dlt/destinations/impl/synapse/synapse.py b/dlt/destinations/impl/synapse/synapse.py
index eb6eae3f20..268ffad933 100644
--- a/dlt/destinations/impl/synapse/synapse.py
+++ b/dlt/destinations/impl/synapse/synapse.py
@@ -4,6 +4,8 @@
 from textwrap import dedent
 from urllib.parse import urlparse, urlunparse
 
+from dlt import current
+
 from dlt.common.destination import DestinationCapabilitiesContext
 from dlt.common.destination.reference import (
     SupportsStagingDestination,
@@ -181,15 +183,15 @@ def generate_sql(
                 f" {staging_table_name};"
             )
             # recreate staging table
-            # In some cases, when multiple instances of this CTAS query are
-            # executed concurrently, Synapse suspends the queries and hangs.
-            # This can be prevented by setting the env var LOAD__WORKERS = "1".
-            sql.append(
-                f"CREATE TABLE {staging_table_name}"
-                " WITH ( DISTRIBUTION = ROUND_ROBIN, HEAP )"  # distribution must be explicitly specified with CTAS
-                f" AS SELECT * FROM {table_name}"
-                " WHERE 1 = 0;"  # no data, table structure only
-            )
+            job_client = current.pipeline().destination_client()  # type: ignore[operator]
+            with job_client.with_staging_dataset():
+                # get table columns from schema
+                columns = [c for c in job_client.schema.get_table_columns(table["name"]).values()]
+                # generate CREATE TABLE statement
+                create_table_stmt = job_client._get_table_update_sql(
+                    table["name"], columns, generate_alter=False
+                )
+            sql.extend(create_table_stmt)
 
         return sql
 
diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py
index 3a0a8f3931..73c8f076d1 100644
--- a/dlt/pipeline/pipeline.py
+++ b/dlt/pipeline/pipeline.py
@@ -483,10 +483,6 @@ def load(
         # make sure that destination is set and client is importable and can be instantiated
         client, staging_client = self._get_destination_clients(self.default_schema)
 
-        # for synapse we might need to adjust the number of load workers
-        if self.destination.destination_name == "synapse":
-            workers = client.config.get_load_workers(self.default_schema.tables, workers)  # type: ignore[attr-defined]
-
         # create default loader config and the loader
         load_config = LoaderConfiguration(
             workers=workers,
diff --git a/tests/load/pipeline/test_replace_disposition.py b/tests/load/pipeline/test_replace_disposition.py
index 65d3646f2d..c6db91efff 100644
--- a/tests/load/pipeline/test_replace_disposition.py
+++ b/tests/load/pipeline/test_replace_disposition.py
@@ -268,10 +268,6 @@ def test_replace_table_clearing(
         "test_replace_table_clearing", dataset_name="test_replace_table_clearing", full_refresh=True
     )
 
-    if destination_config.destination == "synapse" and replace_strategy == "staging-optimized":
-        # this case requires load concurrency to be disabled (else the test gets stuck)
-        assert pipeline.destination_client().config.auto_disable_concurrency is True  # type: ignore[attr-defined]
-
     @dlt.resource(name="main_resource", write_disposition="replace", primary_key="id")
     def items_with_subitems():
         data = {

From 99a0718c74dadf51e4ff95db6d02b83cb5d64797 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Mon, 29 Jan 2024 00:30:30 +0100
Subject: [PATCH 15/23] change test config type to reduce unnecessary tests

---
 tests/load/utils.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/load/utils.py b/tests/load/utils.py
index 5fb706985d..ea4e2916cc 100644
--- a/tests/load/utils.py
+++ b/tests/load/utils.py
@@ -264,14 +264,6 @@ def destinations_configs(
                 bucket_url=AZ_BUCKET,
                 extra_info="az-authorization",
             ),
-            DestinationTestConfiguration(
-                destination="synapse",
-                staging="filesystem",
-                file_format="parquet",
-                bucket_url=AZ_BUCKET,
-                staging_use_msi=True,
-                extra_info="az-managed-identity",
-            ),
         ]
 
     if all_staging_configs:
@@ -304,6 +296,14 @@ def destinations_configs(
                 bucket_url=GCS_BUCKET,
                 extra_info="gcs-authorization",
             ),
+            DestinationTestConfiguration(
+                destination="synapse",
+                staging="filesystem",
+                file_format="parquet",
+                bucket_url=AZ_BUCKET,
+                staging_use_msi=True,
+                extra_info="az-managed-identity",
+            ),            
         ]
 
     # add local filesystem destinations if requested

From 6d14d576a1c3c56e2d72646678cf2655eb929f07 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Mon, 29 Jan 2024 00:48:34 +0100
Subject: [PATCH 16/23] remove trailing whitespace

---
 tests/load/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/load/utils.py b/tests/load/utils.py
index ea4e2916cc..805925ec6a 100644
--- a/tests/load/utils.py
+++ b/tests/load/utils.py
@@ -303,7 +303,7 @@ def destinations_configs(
                 bucket_url=AZ_BUCKET,
                 staging_use_msi=True,
                 extra_info="az-managed-identity",
-            ),            
+            ),
         ]
 
     # add local filesystem destinations if requested

From b87dd1b744bc4c9fe3f2b6ac1cbea08c58296eb5 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Mon, 29 Jan 2024 16:08:22 +0100
Subject: [PATCH 17/23] refine staging table indexing

---
 dlt/destinations/impl/synapse/synapse.py      | 32 +++++++++++++++----
 .../dlt-ecosystem/destinations/synapse.md     |  3 +-
 .../synapse/test_synapse_table_indexing.py    | 11 ++++++-
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/dlt/destinations/impl/synapse/synapse.py b/dlt/destinations/impl/synapse/synapse.py
index 268ffad933..33e6194602 100644
--- a/dlt/destinations/impl/synapse/synapse.py
+++ b/dlt/destinations/impl/synapse/synapse.py
@@ -70,12 +70,22 @@ def __init__(self, schema: Schema, config: SynapseClientConfiguration) -> None:
     def _get_table_update_sql(
         self, table_name: str, new_columns: Sequence[TColumnSchema], generate_alter: bool
     ) -> List[str]:
-        table = self.get_load_table(table_name)
+        table = self.get_load_table(table_name, staging=self.in_staging_mode)
         if table is None:
             table_index_type = self.config.default_table_index_type
         else:
             table_index_type = cast(TTableIndexType, table.get(TABLE_INDEX_TYPE_HINT))
-            if table_index_type == "clustered_columnstore_index":
+            if self.in_staging_mode:
+                final_table = self.get_load_table(table_name, staging=False)
+                final_table_index_type = cast(
+                    TTableIndexType, final_table.get(TABLE_INDEX_TYPE_HINT)
+                )
+            else:
+                final_table_index_type = table_index_type
+            if final_table_index_type == "clustered_columnstore_index":
+                # Even if the staging table has index type "heap", we still adjust
+                # the column data types to prevent errors when writing into the
+                # final table that has index type "clustered_columnstore_index".
                 new_columns = self._get_columstore_valid_columns(new_columns)
 
         _sql_result = SqlJobClientBase._get_table_update_sql(
@@ -129,12 +139,20 @@ def get_load_table(self, table_name: str, staging: bool = False) -> TTableSchema
         table = super().get_load_table(table_name, staging)
         if table is None:
             return None
-        if table_name in self.schema.dlt_table_names():
-            # dlt tables should always be heap tables, regardless of the user
-            # configuration. Why? "For small lookup tables, less than 60 million rows,
-            # consider using HEAP or clustered index for faster query performance."
-            # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#heap-tables
+        if staging and self.config.replace_strategy == "insert-from-staging":
+            # Staging tables should always be heap tables, because "when you are
+            # temporarily landing data in dedicated SQL pool, you may find that
+            # using a heap table makes the overall process faster."
+            # "staging-optimized" is not included, because in that strategy the
+            # staging table becomes the final table, so we should already create
+            # it with the desired index type.
+            table[TABLE_INDEX_TYPE_HINT] = "heap"  # type: ignore[typeddict-unknown-key]
+        elif table_name in self.schema.dlt_table_names():
+            # dlt tables should always be heap tables, because "for small lookup
+            # tables, less than 60 million rows, consider using HEAP or clustered
+            # index for faster query performance."
             table[TABLE_INDEX_TYPE_HINT] = "heap"  # type: ignore[typeddict-unknown-key]
+        # https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#heap-tables
         elif table_name in self.schema.data_table_names():
             if TABLE_INDEX_TYPE_HINT not in table:
                 # If present in parent table, fetch hint from there.
diff --git a/docs/website/docs/dlt-ecosystem/destinations/synapse.md b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
index 4d66714ce3..dcfd92b9fb 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/synapse.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
@@ -135,8 +135,9 @@ Possible values:
 >* **Set `default_table_index_type` to `"clustered_columnstore_index"` if you want to change the default** (see [additional destination options](#additional-destination-options)).
 >* **CLUSTERED COLUMNSTORE INDEX tables do not support the `varchar(max)`, `nvarchar(max)`, and `varbinary(max)` data types.** If you don't specify the `precision` for columns that map to any of these types, `dlt` will use the maximum lengths `varchar(4000)`, `nvarchar(4000)`, and `varbinary(8000)`.
 >* **While Synapse creates CLUSTERED COLUMNSTORE INDEXES by default, `dlt` creates HEAP tables by default.** HEAP is a more robust choice, because it supports all data types and doesn't require conversions.
->* **When using the `staging-optimized` [`replace` strategy](../../general-usage/full-loading.md), the staging tables are always created as HEAP tables**—any configuration of the table index types is ignored. The HEAP strategy makes sense
+>* **When using the `insert-from-staging` [`replace` strategy](../../general-usage/full-loading.md), the staging tables are always created as HEAP tables**—any configuration of the table index types is ignored. The HEAP strategy makes sense
  for staging tables for reasons explained [here](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#heap-tables).
+>* **When using the `staging-optimized` [`replace` strategy](../../general-usage/full-loading.md), the staging tables are already created with the configured table index type**, because the staging table becomes the final table.   
 >* **`dlt` system tables are always created as HEAP tables, regardless of any configuration.** This is in line with Microsoft's recommendation that "for small lookup tables, less than 60 million rows, consider using HEAP or clustered index for faster query performance." 
 >* Child tables, if any, inherent the table index type of their parent table. 
 
diff --git a/tests/load/synapse/test_synapse_table_indexing.py b/tests/load/synapse/test_synapse_table_indexing.py
index e87b83fa3f..df90933de4 100644
--- a/tests/load/synapse/test_synapse_table_indexing.py
+++ b/tests/load/synapse/test_synapse_table_indexing.py
@@ -98,13 +98,22 @@ def items_with_table_index_type_specified() -> Iterator[Any]:
 @pytest.mark.parametrize(
     "table_index_type,column_schema", TABLE_INDEX_TYPE_COLUMN_SCHEMA_PARAM_GRID
 )
+@pytest.mark.parametrize(
+    # Also test staging replace strategies, to make sure the final table index
+    # type is not affected by staging table index type adjustments.
+    "replace_strategy",
+    ["insert-from-staging", "staging-optimized"],
+)
 def test_resource_table_index_type_configuration(
     table_index_type: TTableIndexType,
     column_schema: Union[List[TColumnSchema], None],
+    replace_strategy: str,
 ) -> None:
+    os.environ["DESTINATION__REPLACE_STRATEGY"] = replace_strategy
+
     @dlt.resource(
         name="items_with_table_index_type_specified",
-        write_disposition="append",
+        write_disposition="replace",
         columns=column_schema,
     )
     def items_with_table_index_type_specified() -> Iterator[Any]:

From 1c817bddb475385205b3f332364199c76292c2c8 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Tue, 30 Jan 2024 15:09:35 +0100
Subject: [PATCH 18/23] use generic statement to prevent repeating info

---
 docs/website/docs/general-usage/full-loading.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/website/docs/general-usage/full-loading.md b/docs/website/docs/general-usage/full-loading.md
index 92fdf064fd..4651d156f0 100644
--- a/docs/website/docs/general-usage/full-loading.md
+++ b/docs/website/docs/general-usage/full-loading.md
@@ -67,6 +67,4 @@ opportunities, you should use this strategy. The `staging-optimized` strategy be
   recreated with a [clone command](https://docs.snowflake.com/en/sql-reference/sql/create-clone) from the staging tables. This is a low cost and fast way to create a second independent table from the data of another. Learn
   more about [table cloning on snowflake](https://docs.snowflake.com/en/user-guide/object-clone).
 
-For all other destinations the `staging-optimized` will fall back to the behavior of the `insert-from-staging` strategy.
-
-
+For all other [destinations](../dlt-ecosystem/destinations/index.md), please look at their respective documentation pages to see if and how the `staging-optimized` strategy is implemented. If it is not implemented, `dlt` will fall back to the `insert-from-staging` strategy.

From 2dd979eb8d6a79334c5e624c4d68a73e86dd5d5d Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Thu, 1 Feb 2024 20:13:09 +0100
Subject: [PATCH 19/23] remove outdated documentation

---
 docs/website/docs/dlt-ecosystem/destinations/synapse.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/destinations/synapse.md b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
index dcfd92b9fb..8c1a7b29bc 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/synapse.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
@@ -150,9 +150,6 @@ Synapse supports the following [column hints](https://dlthub.com/docs/general-us
 
 > ❗ These hints are **disabled by default**. This is because the `PRIMARY KEY` and `UNIQUE` [constraints](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-table-constraints) are tricky in Synapse: they are **not enforced** and can lead to innacurate results if the user does not ensure all column values are unique. For the column hints to take effect, the `create_indexes` configuration needs to be set to `True`, see [additional destination options](#additional-destination-options).
 
-## Load concurrency issue
-`dlt` uses threading to enable concurrent processing and [parallel loading](../../reference/performance.md#load). Concurrency does not work properly in all cases when using the `staging-optimized` [`replace` strategy](../../general-usage/full-loading.md), because Synapse suspends the CTAS queries that `dlt` uses behind the scenes and gets stuck. To prevent this from happening, `dlt` automatically sets the number of load workers to 1 to disable concurrency when replacing data using the `staging-optimized` strategy. Set `auto_disable_concurrency = "false"` if you don't want this to happen (see [additional destination options](#additional-destination-options))
-
 ## Staging support
 Synapse supports Azure Blob Storage (both standard and [ADLS Gen2](https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction)) as a file staging destination. `dlt` first uploads Parquet files to the blob container, and then instructs Synapse to read the Parquet file and load its data into a Synapse table using the [COPY INTO](https://learn.microsoft.com/en-us/sql/t-sql/statements/copy-into-transact-sql) statement.
 
@@ -178,7 +175,6 @@ The following settings can optionally be configured:
 [destination.synapse]
 default_table_index_type = "heap"
 create_indexes = "false"
-auto_disable_concurrency = "true"
 staging_use_msi = "false"
 
 [destination.synapse.credentials]
@@ -196,7 +192,6 @@ destination.synapse.credentials = "synapse://loader:your_loader_password@your_sy
 Descriptions:
 - `default_table_index_type` sets the [table index type](#table-index-type) that is used if no table index type is specified on the resource. 
 - `create_indexes` determines if `primary_key` and `unique` [column hints](#supported-column-hints) are applied.
-- `auto_disable_concurrency` determines if concurrency is automatically disabled in cases where it might cause issues.
 - `staging_use_msi` determines if the Managed Identity of the Synapse workspace is used to authorize access to the [staging](#staging-support) Storage Account. Ensure the Managed Identity has the [Storage Blob Data Reader](https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles#storage-blob-data-reader) role (or a higher-priviliged role) assigned on the blob container if you set this option to `"true"`.
 - `port` used for the ODBC connection.
 - `connect_timeout` sets the timeout for the `pyodbc` connection attempt, in seconds.

From da5cdac7f8cce1c59a36322c2a3bdc8735591f6e Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Thu, 1 Feb 2024 20:23:14 +0100
Subject: [PATCH 20/23] add synapse destination to sidebar

---
 docs/website/sidebars.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js
index 2c9b55e6da..f92f43564a 100644
--- a/docs/website/sidebars.js
+++ b/docs/website/sidebars.js
@@ -87,6 +87,7 @@ const sidebars = {
             'dlt-ecosystem/destinations/bigquery',
             'dlt-ecosystem/destinations/duckdb',
             'dlt-ecosystem/destinations/mssql',
+            'dlt-ecosystem/destinations/synapse',
             'dlt-ecosystem/destinations/filesystem',
             'dlt-ecosystem/destinations/postgres',
             'dlt-ecosystem/destinations/redshift',

From d7d9e35cf49691b2e877d9e4b905b9db3e77de67 Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Thu, 1 Feb 2024 23:17:53 +0100
Subject: [PATCH 21/23] add support for additional table hints

---
 dlt/destinations/impl/synapse/synapse_adapter.py |  6 ++++--
 dlt/extract/hints.py                             | 11 ++++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/dlt/destinations/impl/synapse/synapse_adapter.py b/dlt/destinations/impl/synapse/synapse_adapter.py
index f135dd967a..24932736f9 100644
--- a/dlt/destinations/impl/synapse/synapse_adapter.py
+++ b/dlt/destinations/impl/synapse/synapse_adapter.py
@@ -1,4 +1,4 @@
-from typing import Any, Literal, Set, get_args, Final
+from typing import Any, Literal, Set, get_args, Final, Dict
 
 from dlt.extract import DltResource, resource as make_resource
 from dlt.extract.typing import TTableHintTemplate
@@ -39,6 +39,7 @@ def synapse_adapter(data: Any, table_index_type: TTableIndexType = None) -> DltR
     """
     resource = ensure_resource(data)
 
+    additional_table_hints: Dict[str, TTableHintTemplate[Any]] = {}
     if table_index_type is not None:
         if table_index_type not in TABLE_INDEX_TYPES:
             allowed_types = ", ".join(TABLE_INDEX_TYPES)
@@ -46,5 +47,6 @@ def synapse_adapter(data: Any, table_index_type: TTableIndexType = None) -> DltR
                 f"Table index type {table_index_type} is invalid. Allowed table index"
                 f" types are: {allowed_types}."
             )
-        resource._hints[TABLE_INDEX_TYPE_HINT] = table_index_type  # type: ignore[typeddict-unknown-key]
+        additional_table_hints[TABLE_INDEX_TYPE_HINT] = table_index_type
+    resource.apply_hints(additional_table_hints=additional_table_hints)
     return resource
diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py
index 437dbbc6bd..e483f035fc 100644
--- a/dlt/extract/hints.py
+++ b/dlt/extract/hints.py
@@ -1,5 +1,5 @@
 from copy import copy, deepcopy
-from typing import List, TypedDict, cast, Any
+from typing import List, TypedDict, cast, Any, Optional, Dict
 
 from dlt.common.schema.utils import DEFAULT_WRITE_DISPOSITION, merge_columns, new_column, new_table
 from dlt.common.schema.typing import (
@@ -125,6 +125,7 @@ def apply_hints(
         merge_key: TTableHintTemplate[TColumnNames] = None,
         incremental: Incremental[Any] = None,
         schema_contract: TTableHintTemplate[TSchemaContract] = None,
+        additional_table_hints: Optional[Dict[str, TTableHintTemplate[Any]]] = None,
     ) -> None:
         """Creates or modifies existing table schema by setting provided hints. Accepts both static and dynamic hints based on data.
 
@@ -208,6 +209,14 @@ def apply_hints(
                 t["incremental"] = None
             else:
                 t["incremental"] = incremental
+        if additional_table_hints is not None:
+            # loop through provided hints and add, overwrite, or remove them
+            for k, v in additional_table_hints.items():
+                if v:
+                    t[k] = v  # type: ignore[literal-required]
+                else:
+                    t.pop(k, None)  # type: ignore[misc]
+
         self.set_hints(t)
 
     def set_hints(self, hints_template: TResourceHints) -> None:

From bab216d8e09bc2012cceb419ed48ccfe9ac0d0ef Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Mon, 5 Feb 2024 14:45:39 +0100
Subject: [PATCH 22/23] correct content-hash after merge conflict resolution

---
 poetry.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 5ea4d19f2b..915152e0c2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "about-time"
@@ -8749,4 +8749,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.13"
-content-hash = "61fa24ff52200b5bf97906a376826f00350abc8f6810fb2fcea73abaf245437f"
+content-hash = "7b829a75b59316147385e16456395bebf2155e68cdeac3f9fa70523c3c33924a"

From c3efe33d8c71469de77c54e6b4ec44758185da2e Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <sandbj01@heiway.net>
Date: Mon, 5 Feb 2024 14:47:14 +0100
Subject: [PATCH 23/23] only remove hint if it is None, not if it is empty

---
 dlt/extract/hints.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py
index e483f035fc..c1a39041d8 100644
--- a/dlt/extract/hints.py
+++ b/dlt/extract/hints.py
@@ -212,7 +212,7 @@ def apply_hints(
         if additional_table_hints is not None:
             # loop through provided hints and add, overwrite, or remove them
             for k, v in additional_table_hints.items():
-                if v:
+                if v is not None:
                     t[k] = v  # type: ignore[literal-required]
                 else:
                     t.pop(k, None)  # type: ignore[misc]