From 09914a381331c4a89903acc167c7ab847aebf3fd Mon Sep 17 00:00:00 2001
From: Marcel Coetzee <34739235+Pipboyguy@users.noreply.github.com>
Date: Sat, 30 Nov 2024 21:26:41 +0200
Subject: [PATCH 1/4] Support Spatial Types for PostGIS (#1927)

* Add dependencies

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add shapely dependency

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Move sample geodata to correct folder

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Make smaller

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Enhance PostgresTableBuilder test suite with geometry type handling.

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add tests

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add geometry columns with default SRID 4326.

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* resource can't serialize shapely objects

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Expand geom test

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Comments

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Update lock file

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* schema

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* [fix](database): remove unused hex validation method

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Create custom insert job for geom types

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Remove hanging client parameter

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add a TODO comment to address the issue in the splitting logic

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Remove unnecessary init override

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add debugging points

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* [test](database): add tests for geometry parsing in Postgres

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Correct row parsing in Postgres destination

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Yield from supermethod

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add control flow for geom

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add test

* refactor geo parsing

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* [fix](test): correct schema name in PostGIS geometry test

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Remove stale test

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Remove geopandas test until resolution

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add docs and raise on malformed values

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add postgis dependency to ci

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* fix postgis image repo

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add postgis to dbt runner

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Change snippet to py instead of python

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* add postgis

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Remove unused geodata file

* Remove unnecessary INSERT class

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Add WKB format handling

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Packaging

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Move import to local

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Comment

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* postgis docs

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Update lockfile

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* fix(deps): remove shapely dependency from postgis extra

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* format

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* feat(postgres): add support for CSV loading of geometry columns

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* Remove wkb examples in docs

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

* format

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>

---------

Signed-off-by: Marcel Coetzee <marcel@mooncoon.com>
---
 .github/workflows/test_dbt_runner.yml         |   2 +-
 .github/workflows/test_destinations.yml       |   2 +-
 .github/workflows/test_local_destinations.yml |   4 +-
 .github/workflows/test_local_sources.yml      |   4 +-
 dlt/common/libs/pyarrow.py                    |   9 +-
 dlt/destinations/impl/postgres/factory.py     |  22 +-
 dlt/destinations/impl/postgres/postgres.py    |  54 ++--
 .../impl/postgres/postgres_adapter.py         |  63 +++++
 .../dlt-ecosystem/destinations/postgres.md    |  52 +++-
 poetry.lock                                   | 100 ++++++--
 pyproject.toml                                |   2 +
 .../postgres/test_postgres_table_builder.py   | 239 +++++++++++++++++-
 tests/load/postgres/utils.py                  |  68 +++++
 13 files changed, 547 insertions(+), 74 deletions(-)
 create mode 100644 dlt/destinations/impl/postgres/postgres_adapter.py
 create mode 100644 tests/load/postgres/utils.py

diff --git a/.github/workflows/test_dbt_runner.yml b/.github/workflows/test_dbt_runner.yml
index 13810fbc0d..ad29909d9a 100644
--- a/.github/workflows/test_dbt_runner.yml
+++ b/.github/workflows/test_dbt_runner.yml
@@ -60,7 +60,7 @@ jobs:
 
       - name: Install dependencies
         # install dlt with postgres support
-        run: poetry install --no-interaction -E postgres --with sentry-sdk,dbt
+        run: poetry install --no-interaction -E postgres -E postgis --with sentry-sdk,dbt
 
       - name: create secrets.toml
         run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
diff --git a/.github/workflows/test_destinations.yml b/.github/workflows/test_destinations.yml
index df398e13ad..933248d994 100644
--- a/.github/workflows/test_destinations.yml
+++ b/.github/workflows/test_destinations.yml
@@ -78,7 +78,7 @@ jobs:
 
       - name: Install dependencies
         # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-        run: poetry install --no-interaction -E redshift -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline -E deltalake
+        run: poetry install --no-interaction -E redshift -E postgis -E postgres -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline -E deltalake
 
       - name: create secrets.toml
         run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
diff --git a/.github/workflows/test_local_destinations.yml b/.github/workflows/test_local_destinations.yml
index 61bfe1551a..4947a46a3b 100644
--- a/.github/workflows/test_local_destinations.yml
+++ b/.github/workflows/test_local_destinations.yml
@@ -48,7 +48,7 @@ jobs:
       # Label used to access the service container
       postgres:
         # Docker Hub image
-        image: postgres
+        image: postgis/postgis
         # Provide the password for postgres
         env:
           POSTGRES_DB: dlt_data
@@ -95,7 +95,7 @@ jobs:
           key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations
 
       - name: Install dependencies
-        run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline -E deltalake
+        run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline -E deltalake
 
       - name: Start SFTP server
         run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d
diff --git a/.github/workflows/test_local_sources.yml b/.github/workflows/test_local_sources.yml
index 8a3ba2a670..39689f5c85 100644
--- a/.github/workflows/test_local_sources.yml
+++ b/.github/workflows/test_local_sources.yml
@@ -43,7 +43,7 @@ jobs:
       # Label used to access the service container
       postgres:
         # Docker Hub image
-        image: postgres
+        image: postgis/postgis
         # Provide the password for postgres
         env:
           POSTGRES_DB: dlt_data
@@ -83,7 +83,7 @@ jobs:
 
       # TODO: which deps should we enable?
       - name: Install dependencies
-        run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E sql_database --with sentry-sdk,pipeline,sources
+        run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E sql_database --with sentry-sdk,pipeline,sources
 
       # run sources tests in load against configured destinations
       - run: poetry run pytest tests/load/sources
diff --git a/dlt/common/libs/pyarrow.py b/dlt/common/libs/pyarrow.py
index 37268c0d2f..029cd75399 100644
--- a/dlt/common/libs/pyarrow.py
+++ b/dlt/common/libs/pyarrow.py
@@ -628,7 +628,14 @@ def row_tuples_to_arrow(
                 " extracting an SQL VIEW that selects with cast."
             )
             json_str_array = pa.array(
-                [None if s is None else json.dumps(s) if not issubclass(type(s), set) else json.dumps(list(s)) for s in columnar_known_types[field.name]]
+                [
+                    (
+                        None
+                        if s is None
+                        else json.dumps(s) if not issubclass(type(s), set) else json.dumps(list(s))
+                    )
+                    for s in columnar_known_types[field.name]
+                ]
             )
             columnar_known_types[field.name] = json_str_array
 
diff --git a/dlt/destinations/impl/postgres/factory.py b/dlt/destinations/impl/postgres/factory.py
index bde0e35f3d..e0dc2836eb 100644
--- a/dlt/destinations/impl/postgres/factory.py
+++ b/dlt/destinations/impl/postgres/factory.py
@@ -1,19 +1,19 @@
 import typing as t
 
+from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE
 from dlt.common.data_writers.configuration import CsvFormatConfiguration
-from dlt.common.destination import Destination, DestinationCapabilitiesContext
 from dlt.common.data_writers.escape import escape_postgres_identifier, escape_postgres_literal
-from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE
+from dlt.common.destination import Destination, DestinationCapabilitiesContext
 from dlt.common.destination.typing import PreparedTableSchema
 from dlt.common.exceptions import TerminalValueError
 from dlt.common.schema.typing import TColumnSchema, TColumnType
 from dlt.common.wei import EVM_DECIMAL_PRECISION
-
-from dlt.destinations.type_mapping import TypeMapperImpl
 from dlt.destinations.impl.postgres.configuration import (
     PostgresCredentials,
     PostgresClientConfiguration,
 )
+from dlt.destinations.impl.postgres.postgres_adapter import GEOMETRY_HINT, SRID_HINT
+from dlt.destinations.type_mapping import TypeMapperImpl
 
 if t.TYPE_CHECKING:
     from dlt.destinations.impl.postgres.postgres import PostgresClient
@@ -55,6 +55,7 @@ class PostgresTypeMapper(TypeMapperImpl):
         "character varying": "text",
         "smallint": "bigint",
         "integer": "bigint",
+        "geometry": "text",
     }
 
     def to_db_integer_type(self, column: TColumnSchema, table: PreparedTableSchema = None) -> str:
@@ -108,11 +109,18 @@ def to_db_datetime_type(
     def from_destination_type(
         self, db_type: str, precision: t.Optional[int] = None, scale: t.Optional[int] = None
     ) -> TColumnType:
-        if db_type == "numeric":
-            if (precision, scale) == self.capabilities.wei_precision:
-                return dict(data_type="wei")
+        if db_type == "numeric" and (precision, scale) == self.capabilities.wei_precision:
+            return dict(data_type="wei")
+        if db_type.startswith("geometry"):
+            return dict(data_type="text")
         return super().from_destination_type(db_type, precision, scale)
 
+    def to_destination_type(self, column: TColumnSchema, table: PreparedTableSchema) -> str:
+        if column.get(GEOMETRY_HINT):
+            srid = column.get(SRID_HINT, 4326)
+            return f"geometry(Geometry, {srid})"
+        return super().to_destination_type(column, table)
+
 
 class postgres(Destination[PostgresClientConfiguration, "PostgresClient"]):
     spec = PostgresClientConfiguration
diff --git a/dlt/destinations/impl/postgres/postgres.py b/dlt/destinations/impl/postgres/postgres.py
index 682f70da04..2459ee1dbe 100644
--- a/dlt/destinations/impl/postgres/postgres.py
+++ b/dlt/destinations/impl/postgres/postgres.py
@@ -2,9 +2,9 @@
 
 from dlt.common import logger
 from dlt.common.data_writers.configuration import CsvFormatConfiguration
+from dlt.common.destination import DestinationCapabilitiesContext
 from dlt.common.destination.exceptions import (
     DestinationInvalidFileFormat,
-    DestinationTerminalException,
 )
 from dlt.common.destination.reference import (
     HasFollowupJobs,
@@ -12,20 +12,16 @@
     RunnableLoadJob,
     FollowupJobRequest,
     LoadJob,
-    TLoadJobState,
 )
-from dlt.common.destination import DestinationCapabilitiesContext
-from dlt.common.exceptions import TerminalValueError
 from dlt.common.schema import TColumnSchema, TColumnHint, Schema
-from dlt.common.schema.typing import TColumnType, TTableFormat
+from dlt.common.schema.typing import TColumnType
 from dlt.common.schema.utils import is_nullable_column
 from dlt.common.storages.file_storage import FileStorage
-
-from dlt.destinations.sql_jobs import SqlStagingCopyFollowupJob, SqlJobParams
-from dlt.destinations.insert_job_client import InsertValuesJobClient
-from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient
 from dlt.destinations.impl.postgres.configuration import PostgresClientConfiguration
+from dlt.destinations.impl.postgres.sql_client import Psycopg2SqlClient
+from dlt.destinations.insert_job_client import InsertValuesJobClient
 from dlt.destinations.sql_client import SqlClientBase
+from dlt.destinations.sql_jobs import SqlStagingCopyFollowupJob, SqlJobParams
 
 HINT_TO_POSTGRES_ATTR: Dict[TColumnHint, str] = {"unique": "UNIQUE"}
 
@@ -43,15 +39,16 @@ def generate_sql(
             with sql_client.with_staging_dataset():
                 staging_table_name = sql_client.make_qualified_table_name(table["name"])
             table_name = sql_client.make_qualified_table_name(table["name"])
-            # drop destination table
-            sql.append(f"DROP TABLE IF EXISTS {table_name};")
-            # moving staging table to destination schema
-            sql.append(
-                f"ALTER TABLE {staging_table_name} SET SCHEMA"
-                f" {sql_client.fully_qualified_dataset_name()};"
+            sql.extend(
+                (
+                    f"DROP TABLE IF EXISTS {table_name};",
+                    (
+                        f"ALTER TABLE {staging_table_name} SET SCHEMA"
+                        f" {sql_client.fully_qualified_dataset_name()};"
+                    ),
+                    f"CREATE TABLE {staging_table_name} (like {table_name} including all);",
+                )
             )
-            # recreate staging table
-            sql.append(f"CREATE TABLE {staging_table_name} (like {table_name} including all);")
         return sql
 
 
@@ -111,8 +108,7 @@ def run(self) -> None:
                 split_columns.append(norm_col)
                 if norm_col in split_headers and is_nullable_column(col):
                     split_null_headers.append(norm_col)
-            split_unknown_headers = set(split_headers).difference(split_columns)
-            if split_unknown_headers:
+            if split_unknown_headers := set(split_headers).difference(split_columns):
                 raise DestinationInvalidFileFormat(
                     "postgres",
                     "csv",
@@ -130,15 +126,8 @@ def run(self) -> None:
 
             qualified_table_name = sql_client.make_qualified_table_name(table_name)
             copy_sql = (
-                "COPY %s (%s) FROM STDIN WITH (FORMAT CSV, DELIMITER '%s', NULL '',"
-                " %s ENCODING '%s')"
-                % (
-                    qualified_table_name,
-                    headers,
-                    sep,
-                    null_headers,
-                    csv_format.encoding,
-                )
+                f"COPY {qualified_table_name} ({headers}) FROM STDIN WITH (FORMAT CSV, DELIMITER"
+                f" '{sep}', NULL '', {null_headers} ENCODING '{csv_format.encoding}')"
             )
             with sql_client.begin_transaction():
                 with sql_client.native_connection.cursor() as cursor:
@@ -173,15 +162,16 @@ def create_load_job(
         return job
 
     def _get_column_def_sql(self, c: TColumnSchema, table: PreparedTableSchema = None) -> str:
-        hints_str = " ".join(
+        hints_ = " ".join(
             self.active_hints.get(h, "")
             for h in self.active_hints.keys()
             if c.get(h, False) is True
         )
         column_name = self.sql_client.escape_column_name(c["name"])
-        return (
-            f"{column_name} {self.type_mapper.to_destination_type(c,table)} {hints_str} {self._gen_not_null(c.get('nullable', True))}"
-        )
+        nullability = self._gen_not_null(c.get("nullable", True))
+        column_type = self.type_mapper.to_destination_type(c, table)
+
+        return f"{column_name} {column_type} {hints_} {nullability}"
 
     def _create_replace_followup_jobs(
         self, table_chain: Sequence[PreparedTableSchema]
diff --git a/dlt/destinations/impl/postgres/postgres_adapter.py b/dlt/destinations/impl/postgres/postgres_adapter.py
new file mode 100644
index 0000000000..11e86ec525
--- /dev/null
+++ b/dlt/destinations/impl/postgres/postgres_adapter.py
@@ -0,0 +1,63 @@
+from typing import Any, Optional
+
+from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
+from dlt.destinations.utils import get_resource_for_adapter
+from dlt.extract import DltResource
+
+GEOMETRY_HINT = "x-postgres-geometry"
+SRID_HINT = "x-postgres-srid"
+
+
+def postgres_adapter(
+    data: Any,
+    geometry: TColumnNames = None,
+    srid: Optional[int] = 4326,
+) -> DltResource:
+    """Prepares data for the postgres destination by specifying which columns should
+    be cast to PostGIS geometry types.
+
+    Args:
+        data (Any): The data to be transformed. It can be raw data or an instance
+            of DltResource. If raw data, the function wraps it into a DltResource
+            object.
+        geometry (TColumnNames, optional): Specify columns to cast to geometries.
+            It can be a single column name as a string, or a list of column names.
+        srid (int, optional): The Spatial Reference System Identifier (SRID) to be
+            used for the geometry columns. If not provided, SRID 4326 will be used.
+
+    Returns:
+        DltResource: A resource with applied postgres-specific hints.
+
+    Raises:
+        ValueError: If input for `geometry` is invalid, or if no geometry columns are specified.
+
+    Examples:
+        >>> data = [{"town": "Null Island", "loc": "POINT(0 0)"}]
+        >>> postgres_adapter(data, geometry="loc", srid=4326)
+        [DltResource with hints applied]
+    """
+    resource = get_resource_for_adapter(data)
+
+    column_hints: TTableSchemaColumns = {}
+
+    if geometry:
+        if isinstance(geometry, str):
+            geometry = [geometry]
+        if not isinstance(geometry, list):
+            raise ValueError(
+                "'geometry' must be a list of column names or a single column name as a string."
+            )
+
+        for column_name in geometry:
+            column_hints[column_name] = {
+                "name": column_name,
+                GEOMETRY_HINT: True,  # type: ignore[misc]
+            }
+            if srid is not None:
+                column_hints[column_name][SRID_HINT] = srid  # type: ignore
+
+    if not column_hints:
+        raise ValueError("A value for 'geometry' must be specified.")
+    else:
+        resource.apply_hints(columns=column_hints)
+    return resource
diff --git a/docs/website/docs/dlt-ecosystem/destinations/postgres.md b/docs/website/docs/dlt-ecosystem/destinations/postgres.md
index bb9aba9051..922b187a7e 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/postgres.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/postgres.md
@@ -117,7 +117,57 @@ In the example above, `arrow_table` will be converted to CSV with **pyarrow** an
 ## Supported column hints
 `postgres` will create unique indexes for all columns with `unique` hints. This behavior **may be disabled**.
 
-### Table and column identifiers
+### Spatial Types
+
+To enable GIS capabilities in your Postgres destination, use the `x-postgres-geometry` and `x-postgres-srid` hints for columns containing geometric data.
+The `postgres_adapter` facilitates applying these hints conveniently, with a default SRID of `4326`.
+
+**Supported Geometry Types:**
+
+- WKT (Well-Known Text)
+- Hex Representation
+
+If you have geometry data in binary format, you will need to convert it to hexadecimal representation before loading.
+
+**Example:** Using `postgres_adapter` with Different Geometry Types
+
+```py
+from dlt.destinations.impl.postgres.postgres_adapter import postgres_adapter
+
+# Sample data with various geometry types
+data_wkt = [
+  {"type": "Point_wkt", "geom": "POINT (1 1)"},
+  {"type": "Point_wkt", "geom": "Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)])"},
+  ]
+
+data_wkb_hex = [
+  {"type": "Point_wkb_hex", "geom": "0101000000000000000000F03F000000000000F03F"},
+  {"type": "Point_wkb_hex", "geom": "01020000000300000000000000000000000000000000000000000000000000F03F000000000000F03F00000000000000400000000000000040"},
+]
+
+
+
+# Apply postgres_adapter to the 'geom' column with default SRID 4326
+resource_wkt = postgres_adapter(data_wkt, geometry="geom")
+resource_wkb_hex = postgres_adapter(data_wkb_hex, geometry="geom")
+
+# If you need a different SRID
+resource_wkt = postgres_adapter(data_wkt, geometry="geom", srid=3242)
+```
+
+Ensure that the PostGIS extension is enabled in your Postgres database:
+
+```sql
+CREATE EXTENSION postgis;
+```
+
+This configuration allows `dlt` to map the `geom` column to the PostGIS `geometry` type for spatial queries and analyses.
+
+:::warning
+`LinearRing` geometry type isn't supported.
+:::
+
+## Table and column identifiers
 Postgres supports both case-sensitive and case-insensitive identifiers. All unquoted and lowercase identifiers resolve case-insensitively in SQL statements. Case insensitive [naming conventions](../../general-usage/naming-convention.md#case-sensitive-and-insensitive-destinations) like the default **snake_case** will generate case-insensitive identifiers. Case sensitive (like **sql_cs_v1**) will generate case-sensitive identifiers that must be quoted in SQL statements.
 
 ## Additional destination options
diff --git a/poetry.lock b/poetry.lock
index 1bcff1de4a..9ae26bd04c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "about-time"
@@ -13,13 +13,13 @@ files = [
 
 [[package]]
 name = "adlfs"
-version = "2024.7.0"
+version = "2024.4.1"
 description = "Access Azure Datalake Gen1 with fsspec and dask"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "adlfs-2024.7.0-py3-none-any.whl", hash = "sha256:2005c8e124fda3948f2a6abb2dbebb2c936d2d821acaca6afd61932edfa9bc07"},
-    {file = "adlfs-2024.7.0.tar.gz", hash = "sha256:106995b91f0eb5e775bcd5957d180d9a14faef3271a063b1f65c66fd5ab05ddf"},
+    {file = "adlfs-2024.4.1-py3-none-any.whl", hash = "sha256:acea94612ddacaa34ea8c6babcc95b8da6982f930cdade7a86fbd17382403e16"},
+    {file = "adlfs-2024.4.1.tar.gz", hash = "sha256:75530a45447f358ae53c5c39c298b8d966dae684be84db899f63b94cd96fc000"},
 ]
 
 [package.dependencies]
@@ -4504,13 +4504,13 @@ files = [
 
 [[package]]
 name = "ibis-framework"
-version = "10.0.0.dev231"
+version = "10.0.0.dev256"
 description = "The portable Python dataframe library"
 optional = true
 python-versions = "<4.0,>=3.10"
 files = [
-    {file = "ibis_framework-10.0.0.dev231-py3-none-any.whl", hash = "sha256:8689cbcd55c3680bdb5fd51ff0d2a10260372c1b15661c123b0460087cfdbda2"},
-    {file = "ibis_framework-10.0.0.dev231.tar.gz", hash = "sha256:199142243d1a6a0eba3bbbe0debba910fc8087dffe4eac9e3d61823f6988f421"},
+    {file = "ibis_framework-10.0.0.dev256-py3-none-any.whl", hash = "sha256:d6f21278e6fd78920bbe986df2c871921142635cc4f7d5d2048cae26e307a3df"},
+    {file = "ibis_framework-10.0.0.dev256.tar.gz", hash = "sha256:e9f97d8177fd88f4a3578be20519c1da79a6a7ffac678b46b790bfde67405930"},
 ]
 
 [package.dependencies]
@@ -4520,26 +4520,27 @@ db-dtypes = {version = ">=0.3,<2", optional = true, markers = "extra == \"bigque
 duckdb = {version = ">=0.10,<1.2", optional = true, markers = "extra == \"duckdb\""}
 google-cloud-bigquery = {version = ">=3,<4", optional = true, markers = "extra == \"bigquery\""}
 google-cloud-bigquery-storage = {version = ">=2,<3", optional = true, markers = "extra == \"bigquery\""}
-numpy = {version = ">=1.23.2,<3", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
+numpy = {version = ">=1.23.2,<3", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"databricks\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
 packaging = {version = ">=21.3,<25", optional = true, markers = "extra == \"duckdb\" or extra == \"oracle\" or extra == \"polars\" or extra == \"pyspark\""}
-pandas = {version = ">=1.5.3,<3", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
+pandas = {version = ">=1.5.3,<3", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"databricks\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
 parsy = ">=2,<3"
 psycopg2 = {version = ">=2.8.4,<3", optional = true, markers = "extra == \"postgres\" or extra == \"risingwave\""}
-pyarrow = {version = ">=10.0.1,<19", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
-pyarrow-hotfix = {version = ">=0.4,<1", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
+pyarrow = {version = ">=10.0.1,<19", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"databricks\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
+pyarrow-hotfix = {version = ">=0.4,<1", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"databricks\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
 pydata-google-auth = {version = ">=1.4.0,<2", optional = true, markers = "extra == \"bigquery\""}
 pyodbc = {version = ">=4.0.39,<6", optional = true, markers = "extra == \"mssql\""}
 python-dateutil = ">=2.8.2,<3"
 pytz = ">=2022.7"
-rich = {version = ">=12.4.4,<14", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
+rich = {version = ">=12.4.4,<14", optional = true, markers = "extra == \"bigquery\" or extra == \"clickhouse\" or extra == \"databricks\" or extra == \"datafusion\" or extra == \"druid\" or extra == \"duckdb\" or extra == \"exasol\" or extra == \"flink\" or extra == \"impala\" or extra == \"mssql\" or extra == \"mysql\" or extra == \"oracle\" or extra == \"polars\" or extra == \"postgres\" or extra == \"pyspark\" or extra == \"snowflake\" or extra == \"sqlite\" or extra == \"risingwave\" or extra == \"trino\""}
 snowflake-connector-python = {version = ">=3.0.2,<3.3.0b1 || >3.3.0b1,<4", optional = true, markers = "extra == \"snowflake\""}
-sqlglot = ">=23.4,<25.29"
+sqlglot = ">=23.4,<25.30"
 toolz = ">=0.11,<2"
 typing-extensions = ">=4.3.0,<5"
 
 [package.extras]
 bigquery = ["db-dtypes (>=0.3,<2)", "google-cloud-bigquery (>=3,<4)", "google-cloud-bigquery-storage (>=2,<3)", "numpy (>=1.23.2,<3)", "pandas (>=1.5.3,<3)", "pyarrow (>=10.0.1,<19)", "pyarrow-hotfix (>=0.4,<1)", "pydata-google-auth (>=1.4.0,<2)", "rich (>=12.4.4,<14)"]
 clickhouse = ["clickhouse-connect[arrow,numpy,pandas] (>=0.5.23,<1)", "numpy (>=1.23.2,<3)", "pandas (>=1.5.3,<3)", "pyarrow (>=10.0.1,<19)", "pyarrow-hotfix (>=0.4,<1)", "rich (>=12.4.4,<14)"]
+databricks = ["databricks-sql-connector-core (>=4,<5)", "numpy (>=1.23.2,<3)", "pandas (>=1.5.3,<3)", "pyarrow (>=10.0.1,<19)", "pyarrow-hotfix (>=0.4,<1)", "rich (>=12.4.4,<14)"]
 datafusion = ["datafusion (>=0.6,<43)", "numpy (>=1.23.2,<3)", "pandas (>=1.5.3,<3)", "pyarrow (>=10.0.1,<19)", "pyarrow-hotfix (>=0.4,<1)", "rich (>=12.4.4,<14)"]
 decompiler = ["black (>=22.1.0,<25)"]
 deltalake = ["deltalake (>=0.9.0,<1)"]
@@ -7453,18 +7454,18 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
 
 [[package]]
 name = "pydata-google-auth"
-version = "1.8.2"
+version = "1.9.0"
 description = "PyData helpers for authenticating to Google APIs"
 optional = true
-python-versions = "*"
+python-versions = ">=3.9"
 files = [
-    {file = "pydata-google-auth-1.8.2.tar.gz", hash = "sha256:547b6c0fbea657dcecd50887c5db8640ebec062a59a2b88e8ff8e53a04818303"},
-    {file = "pydata_google_auth-1.8.2-py2.py3-none-any.whl", hash = "sha256:a9dce59af4a170ea60c4b2ebbc83ee1f74d34255a4f97b2469ae9a4a0dc98e99"},
+    {file = "pydata-google-auth-1.9.0.tar.gz", hash = "sha256:2f546e88f007dfdb050087556eb46d6008e351386a7b368096797fae5df374f2"},
+    {file = "pydata_google_auth-1.9.0-py2.py3-none-any.whl", hash = "sha256:e17a44ce8de5b48883667357c03595b85d80938bf1fb714d65bfac9a9f9c8add"},
 ]
 
 [package.dependencies]
-google-auth = {version = ">=1.25.0,<3.0dev", markers = "python_version >= \"3.6\""}
-google-auth-oauthlib = {version = ">=0.4.0", markers = "python_version >= \"3.6\""}
+google-auth = ">=1.25.0,<3.0dev"
+google-auth-oauthlib = ">=0.4.0"
 setuptools = "*"
 
 [[package]]
@@ -8820,6 +8821,64 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments
 testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
 testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
 
+[[package]]
+name = "shapely"
+version = "2.0.6"
+description = "Manipulation and analysis of geometric objects"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "shapely-2.0.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29a34e068da2d321e926b5073539fd2a1d4429a2c656bd63f0bd4c8f5b236d0b"},
+    {file = "shapely-2.0.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c84c3f53144febf6af909d6b581bc05e8785d57e27f35ebaa5c1ab9baba13b"},
+    {file = "shapely-2.0.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ad2fae12dca8d2b727fa12b007e46fbc522148a584f5d6546c539f3464dccde"},
+    {file = "shapely-2.0.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3304883bd82d44be1b27a9d17f1167fda8c7f5a02a897958d86c59ec69b705e"},
+    {file = "shapely-2.0.6-cp310-cp310-win32.whl", hash = "sha256:3ec3a0eab496b5e04633a39fa3d5eb5454628228201fb24903d38174ee34565e"},
+    {file = "shapely-2.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:28f87cdf5308a514763a5c38de295544cb27429cfa655d50ed8431a4796090c4"},
+    {file = "shapely-2.0.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5aeb0f51a9db176da9a30cb2f4329b6fbd1e26d359012bb0ac3d3c7781667a9e"},
+    {file = "shapely-2.0.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9a7a78b0d51257a367ee115f4d41ca4d46edbd0dd280f697a8092dd3989867b2"},
+    {file = "shapely-2.0.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f32c23d2f43d54029f986479f7c1f6e09c6b3a19353a3833c2ffb226fb63a855"},
+    {file = "shapely-2.0.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3dc9fb0eb56498912025f5eb352b5126f04801ed0e8bdbd867d21bdbfd7cbd0"},
+    {file = "shapely-2.0.6-cp311-cp311-win32.whl", hash = "sha256:d93b7e0e71c9f095e09454bf18dad5ea716fb6ced5df3cb044564a00723f339d"},
+    {file = "shapely-2.0.6-cp311-cp311-win_amd64.whl", hash = "sha256:c02eb6bf4cfb9fe6568502e85bb2647921ee49171bcd2d4116c7b3109724ef9b"},
+    {file = "shapely-2.0.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cec9193519940e9d1b86a3b4f5af9eb6910197d24af02f247afbfb47bcb3fab0"},
+    {file = "shapely-2.0.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83b94a44ab04a90e88be69e7ddcc6f332da7c0a0ebb1156e1c4f568bbec983c3"},
+    {file = "shapely-2.0.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:537c4b2716d22c92036d00b34aac9d3775e3691f80c7aa517c2c290351f42cd8"},
+    {file = "shapely-2.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98fea108334be345c283ce74bf064fa00cfdd718048a8af7343c59eb40f59726"},
+    {file = "shapely-2.0.6-cp312-cp312-win32.whl", hash = "sha256:42fd4cd4834747e4990227e4cbafb02242c0cffe9ce7ef9971f53ac52d80d55f"},
+    {file = "shapely-2.0.6-cp312-cp312-win_amd64.whl", hash = "sha256:665990c84aece05efb68a21b3523a6b2057e84a1afbef426ad287f0796ef8a48"},
+    {file = "shapely-2.0.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:42805ef90783ce689a4dde2b6b2f261e2c52609226a0438d882e3ced40bb3013"},
+    {file = "shapely-2.0.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6d2cb146191a47bd0cee8ff5f90b47547b82b6345c0d02dd8b25b88b68af62d7"},
+    {file = "shapely-2.0.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3fdef0a1794a8fe70dc1f514440aa34426cc0ae98d9a1027fb299d45741c381"},
+    {file = "shapely-2.0.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c665a0301c645615a107ff7f52adafa2153beab51daf34587170d85e8ba6805"},
+    {file = "shapely-2.0.6-cp313-cp313-win32.whl", hash = "sha256:0334bd51828f68cd54b87d80b3e7cee93f249d82ae55a0faf3ea21c9be7b323a"},
+    {file = "shapely-2.0.6-cp313-cp313-win_amd64.whl", hash = "sha256:d37d070da9e0e0f0a530a621e17c0b8c3c9d04105655132a87cfff8bd77cc4c2"},
+    {file = "shapely-2.0.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fa7468e4f5b92049c0f36d63c3e309f85f2775752e076378e36c6387245c5462"},
+    {file = "shapely-2.0.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed5867e598a9e8ac3291da6cc9baa62ca25706eea186117034e8ec0ea4355653"},
+    {file = "shapely-2.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81d9dfe155f371f78c8d895a7b7f323bb241fb148d848a2bf2244f79213123fe"},
+    {file = "shapely-2.0.6-cp37-cp37m-win32.whl", hash = "sha256:fbb7bf02a7542dba55129062570211cfb0defa05386409b3e306c39612e7fbcc"},
+    {file = "shapely-2.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:837d395fac58aa01aa544495b97940995211e3e25f9aaf87bc3ba5b3a8cd1ac7"},
+    {file = "shapely-2.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c6d88ade96bf02f6bfd667ddd3626913098e243e419a0325ebef2bbd481d1eb6"},
+    {file = "shapely-2.0.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8b3b818c4407eaa0b4cb376fd2305e20ff6df757bf1356651589eadc14aab41b"},
+    {file = "shapely-2.0.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bbc783529a21f2bd50c79cef90761f72d41c45622b3e57acf78d984c50a5d13"},
+    {file = "shapely-2.0.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2423f6c0903ebe5df6d32e0066b3d94029aab18425ad4b07bf98c3972a6e25a1"},
+    {file = "shapely-2.0.6-cp38-cp38-win32.whl", hash = "sha256:2de00c3bfa80d6750832bde1d9487e302a6dd21d90cb2f210515cefdb616e5f5"},
+    {file = "shapely-2.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:3a82d58a1134d5e975f19268710e53bddd9c473743356c90d97ce04b73e101ee"},
+    {file = "shapely-2.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:392f66f458a0a2c706254f473290418236e52aa4c9b476a072539d63a2460595"},
+    {file = "shapely-2.0.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eba5bae271d523c938274c61658ebc34de6c4b33fdf43ef7e938b5776388c1be"},
+    {file = "shapely-2.0.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7060566bc4888b0c8ed14b5d57df8a0ead5c28f9b69fb6bed4476df31c51b0af"},
+    {file = "shapely-2.0.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b02154b3e9d076a29a8513dffcb80f047a5ea63c897c0cd3d3679f29363cf7e5"},
+    {file = "shapely-2.0.6-cp39-cp39-win32.whl", hash = "sha256:44246d30124a4f1a638a7d5419149959532b99dfa25b54393512e6acc9c211ac"},
+    {file = "shapely-2.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:2b542d7f1dbb89192d3512c52b679c822ba916f93479fa5d4fc2fe4fa0b3c9e8"},
+    {file = "shapely-2.0.6.tar.gz", hash = "sha256:997f6159b1484059ec239cacaa53467fd8b5564dabe186cd84ac2944663b0bf6"},
+]
+
+[package.dependencies]
+numpy = ">=1.14,<3"
+
+[package.extras]
+docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"]
+test = ["pytest", "pytest-cov"]
+
 [[package]]
 name = "shellingham"
 version = "1.5.4"
@@ -10544,6 +10603,7 @@ lancedb = ["lancedb", "pyarrow", "tantivy"]
 motherduck = ["duckdb", "pyarrow"]
 mssql = ["pyodbc"]
 parquet = ["pyarrow"]
+postgis = ["psycopg2-binary", "psycopg2cffi"]
 postgres = ["psycopg2-binary", "psycopg2cffi"]
 qdrant = ["qdrant-client"]
 redshift = ["psycopg2-binary", "psycopg2cffi"]
@@ -10558,4 +10618,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.13"
-content-hash = "749c79ead9b1a800cbe5d9c93650e2ede7e9bcb240d07ff2d1787d032a0f2fa6"
+content-hash = "24e262ce6bb496fad6e587c76bb9ad60a2cc45a00f52e368b59978093e57b77c"
diff --git a/pyproject.toml b/pyproject.toml
index a1a71a1a6a..638653ffcf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -119,6 +119,7 @@ lancedb = ["lancedb", "pyarrow", "tantivy"]
 deltalake = ["deltalake", "pyarrow"]
 sql_database = ["sqlalchemy"]
 sqlalchemy = ["sqlalchemy", "alembic"]
+postgis = ["psycopg2-binary", "psycopg2cffi"]
 
 [tool.poetry.scripts]
 dlt = "dlt.cli._dlt:_main"
@@ -168,6 +169,7 @@ types-regex = "^2024.5.15.20240519"
 flake8-print = "^5.0.0"
 mimesis = "^7.0.0"
 ibis-framework = { version = ">=9.0.0", markers = "python_version >= '3.10'", optional = true, extras = ["duckdb", "postgres", "bigquery", "snowflake", "mssql", "clickhouse"]}
+shapely = ">=2.0.6"
 
 [tool.poetry.group.sources]
 optional = true
diff --git a/tests/load/postgres/test_postgres_table_builder.py b/tests/load/postgres/test_postgres_table_builder.py
index 4dac400f2a..e2ed0f0b2e 100644
--- a/tests/load/postgres/test_postgres_table_builder.py
+++ b/tests/load/postgres/test_postgres_table_builder.py
@@ -1,24 +1,35 @@
-import pytest
 from copy import deepcopy
+from typing import Generator, Any, List
+
+import pytest
 import sqlfluff
 
+import dlt
 from dlt.common.exceptions import TerminalValueError
-from dlt.common.utils import uniq_id
 from dlt.common.schema import Schema, utils
-
+from dlt.common.typing import DictStrStr
+from dlt.common.utils import uniq_id
 from dlt.destinations import postgres
-from dlt.destinations.impl.postgres.postgres import PostgresClient
 from dlt.destinations.impl.postgres.configuration import (
     PostgresClientConfiguration,
     PostgresCredentials,
 )
-
+from dlt.destinations.impl.postgres.postgres import (
+    PostgresClient,
+)
+from dlt.destinations.impl.postgres.postgres_adapter import (
+    postgres_adapter,
+    SRID_HINT,
+    GEOMETRY_HINT,
+)
+from dlt.extract import DltResource
 from tests.cases import (
     TABLE_UPDATE,
     TABLE_UPDATE_ALL_INT_PRECISIONS,
-    TABLE_UPDATE_ALL_TIMESTAMP_PRECISIONS,
 )
-from tests.load.utils import empty_schema
+from tests.load.postgres.utils import generate_sample_geometry_records
+from tests.load.utils import destinations_configs, DestinationTestConfiguration, sequence_generator
+from tests.utils import assert_load_info
 
 # mark all tests as essential, do not remove
 pytestmark = pytest.mark.essential
@@ -182,3 +193,217 @@ def test_create_dlt_table(client: PostgresClient) -> None:
     sqlfluff.parse(sql, dialect="postgres")
     qualified_name = client.sql_client.make_qualified_table_name("_dlt_version")
     assert f"CREATE TABLE IF NOT EXISTS {qualified_name}" in sql
+
+
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(default_sql_configs=True, subset=["postgres"]),
+    ids=lambda x: x.name,
+)
+def test_adapter_geometry_hint_config(
+    destination_config: DestinationTestConfiguration,
+) -> None:
+    @dlt.resource(columns=[{"name": "content", "data_type": "text"}])
+    def some_data() -> Generator[DictStrStr, Any, None]:
+        yield from next(sequence_generator())
+
+    assert some_data.columns["content"] == {"name": "content", "data_type": "text"}  # type: ignore[index]
+
+    # Default SRID.
+    postgres_adapter(some_data, geometry=["content"])
+
+    assert some_data.columns["content"] == {  # type: ignore
+        "name": "content",
+        "data_type": "text",
+        GEOMETRY_HINT: True,
+        SRID_HINT: 4326,
+    }
+
+    # Nonstandard SRID.
+    postgres_adapter(some_data, geometry="content", srid=8232)
+
+    assert some_data.columns["content"] == {  # type: ignore
+        "name": "content",
+        "data_type": "text",
+        GEOMETRY_HINT: True,
+        SRID_HINT: 8232,
+    }
+
+
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(default_sql_configs=True, subset=["postgres"]),
+    ids=lambda x: x.name,
+)
+def test_geometry_types(
+    destination_config: DestinationTestConfiguration,
+) -> None:
+    from shapely import wkt, wkb, LinearRing, Polygon  # type: ignore
+
+    @dlt.resource
+    def geodata_default_wkt():
+        yield from generate_sample_geometry_records("wkt")
+
+    @dlt.resource
+    def geodata_3857_wkt():
+        yield from generate_sample_geometry_records("wkt")
+
+    @dlt.resource
+    def geodata_2163_wkt():
+        yield from generate_sample_geometry_records("wkt")
+
+    @dlt.resource
+    def geodata_default_wkb_hex():
+        yield from generate_sample_geometry_records("wkb_hex")
+
+    @dlt.resource
+    def geodata_3857_wkb_hex():
+        yield from generate_sample_geometry_records("wkb_hex")
+
+    @dlt.resource
+    def geodata_2163_wkb_hex():
+        yield from generate_sample_geometry_records("wkb_hex")
+
+    @dlt.resource(file_format="csv")
+    def geodata_default_csv_wkt():
+        yield from generate_sample_geometry_records("wkt")
+
+    @dlt.resource(file_format="csv")
+    def geodata_3857_csv_wkt():
+        yield from generate_sample_geometry_records("wkt")
+
+    @dlt.resource(file_format="csv")
+    def geodata_2163_csv_wkt():
+        yield from generate_sample_geometry_records("wkt")
+
+    @dlt.resource(file_format="csv")
+    def geodata_default_csv_wkb_hex():
+        yield from generate_sample_geometry_records("wkb_hex")
+
+    @dlt.resource(file_format="csv")
+    def geodata_3857_csv_wkb_hex():
+        yield from generate_sample_geometry_records("wkb_hex")
+
+    @dlt.resource(file_format="csv")
+    def geodata_2163_csv_wkb_hex():
+        yield from generate_sample_geometry_records("wkb_hex")
+
+    @dlt.resource
+    def no_geodata():
+        yield from [{"a": 1}, {"a": 2}]
+
+    postgres_adapter(geodata_default_wkt, geometry=["geom"])
+    postgres_adapter(geodata_3857_wkt, geometry=["geom"], srid=3857)
+    postgres_adapter(geodata_2163_wkt, geometry=["geom"], srid=2163)
+    postgres_adapter(geodata_default_wkb_hex, geometry=["geom"])
+    postgres_adapter(geodata_3857_wkb_hex, geometry=["geom"], srid=3857)
+    postgres_adapter(geodata_2163_wkb_hex, geometry=["geom"], srid=2163)
+    postgres_adapter(geodata_default_csv_wkt, geometry=["geom"])
+    postgres_adapter(geodata_3857_csv_wkt, geometry=["geom"], srid=3857)
+    postgres_adapter(geodata_2163_csv_wkt, geometry=["geom"], srid=2163)
+    postgres_adapter(geodata_default_csv_wkb_hex, geometry=["geom"])
+    postgres_adapter(geodata_3857_csv_wkb_hex, geometry=["geom"], srid=3857)
+    postgres_adapter(geodata_2163_csv_wkb_hex, geometry=["geom"], srid=2163)
+
+    @dlt.source
+    def geodata() -> List[DltResource]:
+        return [
+            geodata_default_wkt,
+            geodata_3857_wkt,
+            geodata_2163_wkt,
+            geodata_default_wkb_hex,
+            geodata_3857_wkb_hex,
+            geodata_2163_wkb_hex,
+            no_geodata,
+            geodata_default_csv_wkt,
+            geodata_3857_csv_wkt,
+            geodata_2163_csv_wkt,
+            geodata_default_csv_wkb_hex,
+            geodata_3857_csv_wkb_hex,
+            geodata_2163_csv_wkb_hex,
+        ]
+
+    pipeline = destination_config.setup_pipeline("test_geometry_types", dev_mode=True)
+    info = pipeline.run(
+        geodata(),
+    )
+    assert_load_info(info)
+
+    # Assert that types were read in as PostGIS geometry types
+    with pipeline.sql_client() as c:
+        with c.execute_query(f"""SELECT f_geometry_column
+FROM geometry_columns
+WHERE f_table_name in
+      ('geodata_default_wkb', 'geodata_3857_wkb', 'geodata_2163_wkb', 'geodata_default_wkt', 'geodata_3857_wkt',
+       'geodata_2163_wkt', 'geodata_default_wkb_hex', 'geodata_3857_wkb_hex', 'geodata_2163_wkb_hex',
+       'geodata_default_csv_wkt', 'geodata_3857_csv_wkt', 'geodata_2163_csv_wkt', 'geodata_default_csv_wkb_hex',
+       'geodata_3857_csv_wkb_hex', 'geodata_2163_csv_wkb_hex'
+          )
+  AND f_table_schema = '{c.fully_qualified_dataset_name(escape=False)}'""") as cur:
+            records = cur.fetchall()
+            assert records
+            assert {record[0] for record in records} == {"geom"}
+
+        # Verify round-trip integrity
+        for resource in [
+            "geodata_default_wkt",
+            "geodata_3857_wkt",
+            "geodata_2163_wkt",
+            "geodata_default_wkb_hex",
+            "geodata_3857_wkb_hex",
+            "geodata_2163_wkb_hex",
+            "geodata_default_csv_wkt",
+            "geodata_3857_csv_wkt",
+            "geodata_2163_csv_wkt",
+            "geodata_default_csv_wkb_hex",
+            "geodata_3857_csv_wkb_hex",
+            "geodata_2163_csv_wkb_hex",
+        ]:
+            srid = 4326 if resource.startswith("geodata_default") else int(resource.split("_")[1])
+
+            query = f"""
+             SELECT type, ST_AsText(geom) as wkt, ST_SRID(geom) as srid, ST_AsBinary(geom) as wkb
+             FROM {c.make_qualified_table_name(resource)}
+             """
+
+            with c.execute_query(query) as cur:
+                results = cur.fetchall()
+
+            def get_format(column_name):
+                if column_name.endswith("wkb_hex"):
+                    return "wkb_hex"
+                return column_name.split("_")[-1]
+
+            original_geometries = generate_sample_geometry_records(get_format(resource))
+
+            for result in results:
+                db_type, db_wkt, db_srid, db_wkb = result
+                orig_geom = next((g for g in original_geometries if g["type"] == db_type), None)
+
+                assert orig_geom is not None, f"No matching original geometry found for {db_type}"
+
+                assert (
+                    db_srid == srid
+                ), f"SRID mismatch for {db_type}: expected {srid}, got {db_srid}"
+
+                if "Empty" in db_type:
+                    assert wkt.loads(db_wkt).is_empty, f"Expected empty geometry for {db_type}"
+                else:
+                    if "_wkt" in db_type:
+                        orig_geom = wkt.loads(orig_geom["geom"])
+                        db_geom = wkt.loads(db_wkt)
+                    elif "_wkb_hex" in db_type:
+                        orig_geom = wkb.loads(bytes.fromhex(orig_geom["geom"]))
+                        db_geom = wkb.loads(bytes(db_wkb))
+
+                    tolerance = 1e-8
+                    if isinstance(orig_geom, LinearRing):
+                        # LinearRing geometries are converted to Polygons for PostGIS compatibility.
+                        db_geom = Polygon(orig_geom)
+                        assert LinearRing(db_geom.exterior.coords).equals_exact(
+                            orig_geom, tolerance
+                        ), f"Geometry mismatch for {db_type}"
+                    else:
+                        assert orig_geom.equals_exact(  # type: ignore[attr-defined]
+                            db_geom, tolerance
+                        ), f"Geometry mismatch for {db_type}"
diff --git a/tests/load/postgres/utils.py b/tests/load/postgres/utils.py
new file mode 100644
index 0000000000..b03a6b5096
--- /dev/null
+++ b/tests/load/postgres/utils.py
@@ -0,0 +1,68 @@
+from typing import List
+
+from shapely import (  # type: ignore
+    Point,
+    LineString,
+    Polygon,
+    MultiPoint,
+    MultiLineString,
+    MultiPolygon,
+    GeometryCollection,
+    LinearRing,
+)
+from shapely.wkb import dumps as wkb_dumps  # type: ignore
+
+from dlt.common.typing import DictStrStr
+
+
+def generate_sample_geometry_records(geometry_type: str) -> List[DictStrStr]:
+    """
+    Generate sample geometry records including WKT and WKB representations.
+
+    Returns:
+        A list of dictionaries, each containing a geometry type,
+        its Well-Known Text (WKT), and Well-Known Binary (WKB) representation.
+    """
+    geometries = [
+        ("Point", Point(1, 1)),
+        ("LineString", LineString([(0, 0), (1, 1), (2, 2)])),
+        ("Polygon", Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)])),
+        ("MultiPoint", MultiPoint([(0, 0), (1, 1), (2, 2)])),
+        ("MultiLineString", MultiLineString([((0, 0), (1, 1)), ((2, 2), (3, 3))])),
+        (
+            "MultiPolygon",
+            MultiPolygon(
+                [
+                    Polygon([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]),
+                    Polygon([(2, 2), (3, 2), (3, 3), (2, 3), (2, 2)]),
+                ]
+            ),
+        ),
+        (
+            "GeometryCollection",
+            GeometryCollection([Point(1, 1), LineString([(0, 0), (1, 1), (2, 2)])]),
+        ),
+        (
+            "ComplexPolygon",
+            Polygon(
+                [(0, 0), (10, 0), (10, 10), (0, 10), (0, 0)],
+                [[(4, 4), (6, 4), (6, 6), (4, 6), (4, 4)]],
+            ),
+        ),
+        ("EmptyPoint", Point()),
+        ("EmptyLineString", LineString()),
+        ("EmptyPolygon", Polygon()),
+        ("EmptyMultiPoint", MultiPoint()),
+        ("EmptyMultiLineString", MultiLineString()),
+        ("EmptyMultiPolygon", MultiPolygon()),
+        ("EmptyGeometryCollection", GeometryCollection()),
+    ]
+
+    # LinearRing only works with wkb types
+    if geometry_type == "wkb":
+        geometries += [("LinearRing", LinearRing([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]))]
+
+    return [
+        {"type": f"{name}_{geometry_type}", "geom": getattr(geom, geometry_type)}
+        for name, geom in geometries
+    ]

From 61c2ed96053bd02632b87e2c85fa940a91a9d03b Mon Sep 17 00:00:00 2001
From: Steinthor Palsson <steini90@gmail.com>
Date: Sat, 30 Nov 2024 14:45:29 -0500
Subject: [PATCH 2/4] Incremental table hints and incremental in resource
 decorator (#2033)

* Incremental table hints and incremental in resource decorator

* Extract incremental settings to a dict in table schema
* Support passing incremental settings to @resource decorator

* Fix type errors

* Reset incremental from_hints when set in resource decorator

* Column hint

* Merge multiple hints

* Test non column match

* adds make_hints test

* Accept TIncrementalconfig in make hints

* bool only incremental hint

* Test jsonpath simple field name

---------

Co-authored-by: Marcin Rudolf <rudolfix@rudolfix.org>
---
 dlt/common/destination/typing.py              |   6 +-
 dlt/common/incremental/__init__.py            |   0
 dlt/{extract => common}/incremental/typing.py |  12 +-
 dlt/common/pipeline.py                        |   3 +-
 dlt/common/schema/typing.py                   |   5 +-
 dlt/common/schema/utils.py                    |  11 +
 dlt/common/typing.py                          |   7 +
 .../impl/bigquery/bigquery_adapter.py         |   6 +-
 .../impl/lancedb/lancedb_adapter.py           |   3 +-
 .../impl/qdrant/qdrant_adapter.py             |   3 +-
 .../impl/weaviate/weaviate_adapter.py         |   3 +-
 dlt/extract/decorators.py                     |  19 +-
 dlt/extract/extract.py                        |   7 +-
 dlt/extract/hints.py                          |  36 ++-
 dlt/extract/incremental/__init__.py           |  74 +++++-
 dlt/extract/incremental/transform.py          |   5 +-
 dlt/extract/items.py                          |  12 +-
 dlt/extract/resource.py                       | 113 ++++++---
 dlt/extract/utils.py                          |  11 +-
 dlt/pipeline/pipeline.py                      |   3 +-
 dlt/sources/rest_api/typing.py                |   7 +-
 tests/common/test_jsonpath.py                 |  43 ++++
 tests/common/test_validation.py               |   4 +-
 tests/extract/test_extract.py                 |  42 ++++
 tests/extract/test_incremental.py             | 229 +++++++++++++++++-
 25 files changed, 577 insertions(+), 87 deletions(-)
 create mode 100644 dlt/common/incremental/__init__.py
 rename dlt/{extract => common}/incremental/typing.py (66%)
 create mode 100644 tests/common/test_jsonpath.py

diff --git a/dlt/common/destination/typing.py b/dlt/common/destination/typing.py
index 8cc08756cd..c79a2b0adc 100644
--- a/dlt/common/destination/typing.py
+++ b/dlt/common/destination/typing.py
@@ -1,6 +1,10 @@
 from typing import Optional
 
-from dlt.common.schema.typing import _TTableSchemaBase, TWriteDisposition, TTableReferenceParam
+from dlt.common.schema.typing import (
+    _TTableSchemaBase,
+    TWriteDisposition,
+    TTableReferenceParam,
+)
 
 
 class PreparedTableSchema(_TTableSchemaBase, total=False):
diff --git a/dlt/common/incremental/__init__.py b/dlt/common/incremental/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/dlt/extract/incremental/typing.py b/dlt/common/incremental/typing.py
similarity index 66%
rename from dlt/extract/incremental/typing.py
rename to dlt/common/incremental/typing.py
index 7b7786b529..460e2f234b 100644
--- a/dlt/extract/incremental/typing.py
+++ b/dlt/common/incremental/typing.py
@@ -2,9 +2,7 @@
 
 from typing import Any, Callable, List, Literal, Optional, Sequence, TypeVar, Union
 
-from dlt.common.schema.typing import TColumnNames
-from dlt.common.typing import TSortOrder
-from dlt.extract.items import TTableHintTemplate
+from dlt.common.typing import TSortOrder, TTableHintTemplate, TColumnNames
 
 TCursorValue = TypeVar("TCursorValue", bound=Any)
 LastValueFunc = Callable[[Sequence[TCursorValue]], Any]
@@ -19,10 +17,12 @@ class IncrementalColumnState(TypedDict):
 
 class IncrementalArgs(TypedDict, total=False):
     cursor_path: str
-    initial_value: Optional[str]
-    last_value_func: Optional[LastValueFunc[str]]
+    initial_value: Optional[Any]
+    last_value_func: Optional[Union[LastValueFunc[str], Literal["min", "max"]]]
+    """Last value callable or name of built in function"""
     primary_key: Optional[TTableHintTemplate[TColumnNames]]
-    end_value: Optional[str]
+    end_value: Optional[Any]
     row_order: Optional[TSortOrder]
     allow_external_schedulers: Optional[bool]
     lag: Optional[Union[float, int]]
+    on_cursor_value_missing: Optional[OnCursorValueMissing]
diff --git a/dlt/common/pipeline.py b/dlt/common/pipeline.py
index dba1036f85..9d3d5792ea 100644
--- a/dlt/common/pipeline.py
+++ b/dlt/common/pipeline.py
@@ -48,7 +48,6 @@
 )
 from dlt.common.schema import Schema
 from dlt.common.schema.typing import (
-    TColumnNames,
     TColumnSchema,
     TWriteDispositionConfig,
     TSchemaContract,
@@ -56,7 +55,7 @@
 from dlt.common.storages.load_package import ParsedLoadJobFileName
 from dlt.common.storages.load_storage import LoadPackageInfo
 from dlt.common.time import ensure_pendulum_datetime, precise_time
-from dlt.common.typing import DictStrAny, REPattern, StrAny, SupportsHumanize
+from dlt.common.typing import DictStrAny, REPattern, StrAny, SupportsHumanize, TColumnNames
 from dlt.common.jsonpath import delete_matches, TAnyJsonPath
 from dlt.common.data_writers.writers import TLoaderFileFormat
 from dlt.common.utils import RowCounts, merge_row_counts
diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py
index ed6c1c6d78..c8f5de03ed 100644
--- a/dlt/common/schema/typing.py
+++ b/dlt/common/schema/typing.py
@@ -19,7 +19,7 @@
 
 from dlt.common.data_types import TDataType
 from dlt.common.normalizers.typing import TNormalizersConfig
-from dlt.common.typing import TSortOrder, TAnyDateTime, TLoaderFileFormat
+from dlt.common.typing import TSortOrder, TAnyDateTime, TLoaderFileFormat, TColumnNames
 
 try:
     from pydantic import BaseModel as _PydanticBaseModel
@@ -132,8 +132,6 @@ class TColumnPropInfo(NamedTuple):
     "timestamp", "iso_timestamp", "iso_date", "large_integer", "hexbytes_to_text", "wei_to_double"
 ]
 TTypeDetectionFunc = Callable[[Type[Any], Any], Optional[TDataType]]
-TColumnNames = Union[str, Sequence[str]]
-"""A string representing a column name or a list of"""
 
 
 class TColumnType(TypedDict, total=False):
@@ -166,6 +164,7 @@ class TColumnSchema(TColumnSchemaBase, total=False):
     variant: Optional[bool]
     hard_delete: Optional[bool]
     dedup_sort: Optional[TSortOrder]
+    incremental: Optional[bool]
 
 
 TTableSchemaColumns = Dict[str, TColumnSchema]
diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py
index e2e1f959dc..038abdc4d0 100644
--- a/dlt/common/schema/utils.py
+++ b/dlt/common/schema/utils.py
@@ -547,6 +547,17 @@ def merge_diff(table: TTableSchema, table_diff: TPartialTableSchema) -> TPartial
     * table hints are added or replaced from diff
     * nothing gets deleted
     """
+
+    incremental_a_col = get_first_column_name_with_prop(
+        table, "incremental", include_incomplete=True
+    )
+    if incremental_a_col:
+        incremental_b_col = get_first_column_name_with_prop(
+            table_diff, "incremental", include_incomplete=True
+        )
+        if incremental_b_col:
+            table["columns"][incremental_a_col].pop("incremental")
+
     # add new columns when all checks passed
     updated_columns = merge_columns(table["columns"], table_diff["columns"])
     table.update(table_diff)
diff --git a/dlt/common/typing.py b/dlt/common/typing.py
index 94edb57194..a3364d1b07 100644
--- a/dlt/common/typing.py
+++ b/dlt/common/typing.py
@@ -29,6 +29,7 @@
     Iterator,
     Generator,
     NamedTuple,
+    Sequence,
 )
 
 from typing_extensions import (
@@ -112,6 +113,8 @@ class SecretSentinel:
 
 TSecretStrValue = Annotated[str, SecretSentinel]
 
+TColumnNames = Union[str, Sequence[str]]
+"""A string representing a column name or a list of"""
 TDataItem: TypeAlias = Any
 """A single data item as extracted from data source"""
 TDataItems: TypeAlias = Union[TDataItem, List[TDataItem]]
@@ -126,6 +129,10 @@ class SecretSentinel:
 TLoaderFileFormat = Literal["jsonl", "typed-jsonl", "insert_values", "parquet", "csv", "reference"]
 """known loader file formats"""
 
+TDynHintType = TypeVar("TDynHintType")
+TFunHintTemplate = Callable[[TDataItem], TDynHintType]
+TTableHintTemplate = Union[TDynHintType, TFunHintTemplate[TDynHintType]]
+
 
 class ConfigValueSentinel(NamedTuple):
     """Class to create singleton sentinel for config and secret injected value"""
diff --git a/dlt/destinations/impl/bigquery/bigquery_adapter.py b/dlt/destinations/impl/bigquery/bigquery_adapter.py
index 5f6a1fab85..05b26530d9 100644
--- a/dlt/destinations/impl/bigquery/bigquery_adapter.py
+++ b/dlt/destinations/impl/bigquery/bigquery_adapter.py
@@ -4,10 +4,8 @@
 
 from dlt.common.destination import PreparedTableSchema
 from dlt.common.pendulum import timezone
-from dlt.common.schema.typing import (
-    TColumnNames,
-    TTableSchemaColumns,
-)
+from dlt.common.schema.typing import TTableSchemaColumns
+from dlt.common.typing import TColumnNames
 from dlt.destinations.utils import get_resource_for_adapter
 from dlt.extract import DltResource
 from dlt.extract.items import TTableHintTemplate
diff --git a/dlt/destinations/impl/lancedb/lancedb_adapter.py b/dlt/destinations/impl/lancedb/lancedb_adapter.py
index 4314dd703f..d192168d0a 100644
--- a/dlt/destinations/impl/lancedb/lancedb_adapter.py
+++ b/dlt/destinations/impl/lancedb/lancedb_adapter.py
@@ -1,6 +1,7 @@
 from typing import Any, Dict
 
-from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
+from dlt.common.schema.typing import TTableSchemaColumns
+from dlt.common.typing import TColumnNames
 from dlt.destinations.utils import get_resource_for_adapter
 from dlt.extract import DltResource
 from dlt.extract.items import TTableHintTemplate
diff --git a/dlt/destinations/impl/qdrant/qdrant_adapter.py b/dlt/destinations/impl/qdrant/qdrant_adapter.py
index bbc2d719a8..5a5a44965c 100644
--- a/dlt/destinations/impl/qdrant/qdrant_adapter.py
+++ b/dlt/destinations/impl/qdrant/qdrant_adapter.py
@@ -1,6 +1,7 @@
 from typing import Any
 
-from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
+from dlt.common.schema.typing import TTableSchemaColumns
+from dlt.common.typing import TColumnNames
 from dlt.extract import DltResource
 from dlt.destinations.utils import get_resource_for_adapter
 
diff --git a/dlt/destinations/impl/weaviate/weaviate_adapter.py b/dlt/destinations/impl/weaviate/weaviate_adapter.py
index 0ca9047528..329d13c493 100644
--- a/dlt/destinations/impl/weaviate/weaviate_adapter.py
+++ b/dlt/destinations/impl/weaviate/weaviate_adapter.py
@@ -1,6 +1,7 @@
 from typing import Dict, Any, Literal, Set, get_args
 
-from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
+from dlt.common.schema.typing import TTableSchemaColumns
+from dlt.common.typing import TColumnNames
 from dlt.extract import DltResource, resource as make_resource
 from dlt.destinations.utils import get_resource_for_adapter
 
diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py
index 63140e8f78..f8703e1452 100644
--- a/dlt/extract/decorators.py
+++ b/dlt/extract/decorators.py
@@ -32,7 +32,6 @@
 from dlt.common.schema.utils import DEFAULT_WRITE_DISPOSITION
 from dlt.common.schema.schema import Schema
 from dlt.common.schema.typing import (
-    TColumnNames,
     TFileFormat,
     TWriteDisposition,
     TWriteDispositionConfig,
@@ -43,7 +42,8 @@
 )
 from dlt.common.storages.exceptions import SchemaNotFoundError
 from dlt.common.storages.schema_storage import SchemaStorage
-from dlt.common.typing import AnyFun, ParamSpec, Concatenate, TDataItem, TDataItems
+from dlt.common.typing import AnyFun, ParamSpec, Concatenate, TDataItem, TDataItems, TColumnNames
+
 from dlt.common.utils import get_callable_name, get_module_name, is_inner_callable
 
 from dlt.extract.hints import make_hints
@@ -70,6 +70,7 @@
     TSourceFunParams,
 )
 from dlt.extract.resource import DltResource, TUnboundDltResource, TDltResourceImpl
+from dlt.extract.incremental import TIncrementalConfig
 
 
 @configspec
@@ -446,6 +447,7 @@ def resource(
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
     parallelized: bool = False,
+    incremental: Optional[TIncrementalConfig] = None,
     _impl_cls: Type[TDltResourceImpl] = DltResource,  # type: ignore[assignment]
 ) -> TDltResourceImpl: ...
 
@@ -468,6 +470,7 @@ def resource(
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
     parallelized: bool = False,
+    incremental: Optional[TIncrementalConfig] = None,
     _impl_cls: Type[TDltResourceImpl] = DltResource,  # type: ignore[assignment]
 ) -> Callable[[Callable[TResourceFunParams, Any]], TDltResourceImpl]: ...
 
@@ -490,6 +493,7 @@ def resource(
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
     parallelized: bool = False,
+    incremental: Optional[TIncrementalConfig] = None,
     _impl_cls: Type[TDltResourceImpl] = DltResource,  # type: ignore[assignment]
     standalone: Literal[True] = True,
 ) -> Callable[
@@ -515,6 +519,7 @@ def resource(
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
     parallelized: bool = False,
+    incremental: Optional[TIncrementalConfig] = None,
     _impl_cls: Type[TDltResourceImpl] = DltResource,  # type: ignore[assignment]
 ) -> TDltResourceImpl: ...
 
@@ -536,6 +541,7 @@ def resource(
     selected: bool = True,
     spec: Type[BaseConfiguration] = None,
     parallelized: bool = False,
+    incremental: Optional[TIncrementalConfig] = None,
     _impl_cls: Type[TDltResourceImpl] = DltResource,  # type: ignore[assignment]
     standalone: bool = False,
     data_from: TUnboundDltResource = None,
@@ -632,6 +638,7 @@ def make_resource(_name: str, _section: str, _data: Any) -> TDltResourceImpl:
             table_format=table_format,
             file_format=file_format,
             references=references,
+            incremental=incremental,
         )
 
         resource = _impl_cls.from_data(
@@ -643,6 +650,10 @@ def make_resource(_name: str, _section: str, _data: Any) -> TDltResourceImpl:
             cast(DltResource, data_from),
             True,
         )
+
+        if incremental:
+            # Reset the flag to allow overriding by incremental argument
+            resource.incremental._from_hints = False
         # If custom nesting level was specified then
         # we need to add it to table hints so that
         # later in normalizer dlt/common/normalizers/json/relational.py
@@ -681,7 +692,7 @@ def _wrap(*args: Any, **kwargs: Any) -> TDltResourceImpl:
         return _wrap
 
     def decorator(
-        f: Callable[TResourceFunParams, Any]
+        f: Callable[TResourceFunParams, Any],
     ) -> Callable[TResourceFunParams, TDltResourceImpl]:
         if not callable(f):
             if data_from:
@@ -1023,7 +1034,7 @@ def get_source() -> DltSource:
 
 
 def defer(
-    f: Callable[TDeferredFunParams, TBoundItems]
+    f: Callable[TDeferredFunParams, TBoundItems],
 ) -> Callable[TDeferredFunParams, TDeferred[TBoundItems]]:
     @wraps(f)
     def _wrap(*args: Any, **kwargs: Any) -> TDeferred[TBoundItems]:
diff --git a/dlt/extract/extract.py b/dlt/extract/extract.py
index e65f6cf0d0..25c3a0dbae 100644
--- a/dlt/extract/extract.py
+++ b/dlt/extract/extract.py
@@ -2,7 +2,7 @@
 from collections.abc import Sequence as C_Sequence
 from copy import copy
 import itertools
-from typing import Iterator, List, Dict, Any, Optional
+from typing import Iterator, List, Dict, Any, Optional, Mapping
 import yaml
 
 from dlt.common.configuration.container import Container
@@ -17,13 +17,12 @@
     WithStepInfo,
     reset_resource_state,
 )
-from dlt.common.typing import DictStrAny
+from dlt.common.typing import DictStrAny, TColumnNames
 from dlt.common.runtime import signals
 from dlt.common.runtime.collector import Collector, NULL_COLLECTOR
 from dlt.common.schema import Schema, utils
 from dlt.common.schema.typing import (
     TAnySchemaColumns,
-    TColumnNames,
     TSchemaContract,
     TTableFormat,
     TWriteDispositionConfig,
@@ -39,7 +38,7 @@
 
 from dlt.extract.decorators import SourceInjectableContext, SourceSchemaInjectableContext
 from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints
-from dlt.extract.incremental import IncrementalResourceWrapper
+from dlt.extract.incremental import IncrementalResourceWrapper, Incremental
 from dlt.extract.pipe_iterator import PipeIterator
 from dlt.extract.source import DltSource
 from dlt.extract.resource import DltResource
diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py
index 5daabd0c6a..000e5c4cdb 100644
--- a/dlt/extract/hints.py
+++ b/dlt/extract/hints.py
@@ -1,10 +1,9 @@
-from typing import TypedDict, cast, Any, Optional, Dict, Sequence, Mapping
+from typing import TypedDict, cast, Any, Optional, Dict, Sequence, Mapping, Union
 from typing_extensions import Self
 
 from dlt.common import logger
 from dlt.common.schema.typing import (
     C_DLT_ID,
-    TColumnNames,
     TColumnProp,
     TFileFormat,
     TPartialTableSchema,
@@ -28,7 +27,7 @@
     new_column,
     new_table,
 )
-from dlt.common.typing import TDataItem
+from dlt.common.typing import TDataItem, TColumnNames
 from dlt.common.time import ensure_pendulum_datetime
 from dlt.common.utils import clone_dict_nested
 from dlt.common.normalizers.json.relational import DataItemNormalizer
@@ -37,7 +36,7 @@
     DataItemRequiredForDynamicTableHints,
     InconsistentTableTemplate,
 )
-from dlt.extract.incremental import Incremental
+from dlt.extract.incremental import Incremental, TIncrementalConfig
 from dlt.extract.items import TFunHintTemplate, TTableHintTemplate, TableNameMeta, ValidateItem
 from dlt.extract.utils import ensure_table_schema_columns, ensure_table_schema_columns_hint
 from dlt.extract.validation import create_item_validator
@@ -86,6 +85,7 @@ def make_hints(
     table_format: TTableHintTemplate[TTableFormat] = None,
     file_format: TTableHintTemplate[TFileFormat] = None,
     references: TTableHintTemplate[TTableReferenceParam] = None,
+    incremental: TIncrementalConfig = None,
 ) -> TResourceHints:
     """A convenience function to create resource hints. Accepts both static and dynamic hints based on data.
 
@@ -119,6 +119,8 @@ def make_hints(
     if validator:
         new_template["validator"] = validator
     DltResourceHints.validate_dynamic_hints(new_template)
+    if incremental is not None:  # TODO: Validate
+        new_template["incremental"] = Incremental.ensure_instance(incremental)
     return new_template
 
 
@@ -204,6 +206,10 @@ def compute_table_schema(self, item: TDataItem = None, meta: Any = None) -> TTab
             for k, v in table_template.items()
             if k not in NATURAL_CALLABLES
         }  # type: ignore
+        if "incremental" in table_template:
+            incremental = table_template["incremental"]
+            if isinstance(incremental, Incremental) and incremental is not Incremental.EMPTY:
+                resolved_template["incremental"] = incremental
         table_schema = self._create_table_schema(resolved_template, self.name)
         migrate_complex_types(table_schema, warn=True)
         validate_dict_ignoring_xkeys(
@@ -221,7 +227,7 @@ def apply_hints(
         columns: TTableHintTemplate[TAnySchemaColumns] = None,
         primary_key: TTableHintTemplate[TColumnNames] = None,
         merge_key: TTableHintTemplate[TColumnNames] = None,
-        incremental: Incremental[Any] = None,
+        incremental: TIncrementalConfig = None,
         schema_contract: TTableHintTemplate[TSchemaContract] = None,
         additional_table_hints: Optional[Dict[str, TTableHintTemplate[Any]]] = None,
         table_format: TTableHintTemplate[TTableFormat] = None,
@@ -360,7 +366,7 @@ def apply_hints(
 
         # set properties that can't be passed to make_hints
         if incremental is not None:
-            t["incremental"] = incremental
+            t["incremental"] = Incremental.ensure_instance(incremental)
 
         self._set_hints(t, create_table_variant)
         return self
@@ -506,6 +512,22 @@ def _merge_merge_disposition_dict(dict_: Dict[str, Any]) -> None:
                 "row_key": False,
             }
 
+    @staticmethod
+    def _merge_incremental_column_hint(dict_: Dict[str, Any]) -> None:
+        incremental = dict_.pop("incremental")
+        if incremental is None:
+            return
+        col_name = incremental.get_cursor_column_name()
+        if not col_name:
+            # cursor cannot resolve to a single column, no hint added
+            return
+        incremental_col = dict_["columns"].get(col_name)
+        if not incremental_col:
+            incremental_col = {"name": col_name}
+
+        incremental_col["incremental"] = True
+        dict_["columns"][col_name] = incremental_col
+
     @staticmethod
     def _create_table_schema(resource_hints: TResourceHints, resource_name: str) -> TTableSchema:
         """Creates table schema from resource hints and resource name. Resource hints are resolved
@@ -518,6 +540,8 @@ def _create_table_schema(resource_hints: TResourceHints, resource_name: str) ->
                     "disposition": resource_hints["write_disposition"]
                 }  # wrap in dict
             DltResourceHints._merge_write_disposition_dict(resource_hints)  # type: ignore[arg-type]
+        if "incremental" in resource_hints:
+            DltResourceHints._merge_incremental_column_hint(resource_hints)  # type: ignore[arg-type]
         dict_ = cast(TTableSchema, resource_hints)
         dict_["resource"] = resource_name
         return dict_
diff --git a/dlt/extract/incremental/__init__.py b/dlt/extract/incremental/__init__.py
index 69af0d68a6..28d33bb71f 100644
--- a/dlt/extract/incremental/__init__.py
+++ b/dlt/extract/incremental/__init__.py
@@ -1,6 +1,6 @@
 import os
 from datetime import datetime  # noqa: I251
-from typing import Generic, ClassVar, Any, Optional, Type, Dict, Union
+from typing import Generic, ClassVar, Any, Optional, Type, Dict, Union, Literal, Tuple
 from typing_extensions import get_args
 
 import inspect
@@ -9,7 +9,7 @@
 from dlt.common import logger
 from dlt.common.exceptions import MissingDependencyException
 from dlt.common.pendulum import pendulum
-from dlt.common.jsonpath import compile_path
+from dlt.common.jsonpath import compile_path, extract_simple_field_name
 from dlt.common.typing import (
     TDataItem,
     TDataItems,
@@ -19,8 +19,8 @@
     get_generic_type_argument_from_instance,
     is_optional_type,
     is_subclass,
+    TColumnNames,
 )
-from dlt.common.schema.typing import TColumnNames
 from dlt.common.configuration import configspec, ConfigurationValueError
 from dlt.common.configuration.specs import BaseConfiguration
 from dlt.common.pipeline import resource_state
@@ -29,17 +29,19 @@
     coerce_value,
     py_type_to_sc_type,
 )
+from dlt.common.utils import without_none
 
 from dlt.extract.exceptions import IncrementalUnboundError
 from dlt.extract.incremental.exceptions import (
     IncrementalCursorPathMissing,
     IncrementalPrimaryKeyMissing,
 )
-from dlt.extract.incremental.typing import (
+from dlt.common.incremental.typing import (
     IncrementalColumnState,
     TCursorValue,
     LastValueFunc,
     OnCursorValueMissing,
+    IncrementalArgs,
 )
 from dlt.extract.items import SupportsPipe, TTableHintTemplate, ItemTransform
 from dlt.extract.incremental.transform import (
@@ -123,7 +125,7 @@ def __init__(
         self,
         cursor_path: str = None,
         initial_value: Optional[TCursorValue] = None,
-        last_value_func: Optional[LastValueFunc[TCursorValue]] = max,
+        last_value_func: Optional[Union[LastValueFunc[TCursorValue], Literal["min", "max"]]] = max,
         primary_key: Optional[TTableHintTemplate[TColumnNames]] = None,
         end_value: Optional[TCursorValue] = None,
         row_order: Optional[TSortOrder] = None,
@@ -135,6 +137,16 @@ def __init__(
         if cursor_path:
             compile_path(cursor_path)
         self.cursor_path = cursor_path
+        if isinstance(last_value_func, str):
+            if last_value_func == "min":
+                last_value_func = min
+            elif last_value_func == "max":
+                last_value_func = max
+            else:
+                raise ValueError(
+                    f"Unknown last_value_func '{last_value_func}' passed as string. Provide a"
+                    " callable to use a custom function."
+                )
         self.last_value_func = last_value_func
         self.initial_value = initial_value
         """Initial value of last_value"""
@@ -247,6 +259,10 @@ def copy(self) -> "Incremental[TCursorValue]":
         # merge creates a copy
         return self.merge(self)
 
+    def get_cursor_column_name(self) -> Optional[str]:
+        """Return the name of the cursor column if the cursor path resolves to a single column"""
+        return extract_simple_field_name(self.cursor_path)
+
     def on_resolved(self) -> None:
         compile_path(self.cursor_path)
         if self.end_value is not None and self.initial_value is None:
@@ -491,6 +507,12 @@ def can_close(self) -> bool:
             and self.start_out_of_range
         )
 
+    @classmethod
+    def ensure_instance(cls, value: "TIncrementalConfig") -> "Incremental[TCursorValue]":
+        if isinstance(value, Incremental):
+            return value
+        return cls(**value)
+
     def __str__(self) -> str:
         return (
             f"Incremental at 0x{id(self):x} for resource {self.resource_name} with cursor path:"
@@ -511,7 +533,6 @@ def _get_transformer(self, items: TDataItems) -> IncrementalTransform:
     def __call__(self, rows: TDataItems, meta: Any = None) -> Optional[TDataItems]:
         if rows is None:
             return rows
-
         transformer = self._get_transformer(rows)
         if isinstance(rows, list):
             rows = [
@@ -556,6 +577,8 @@ def _check_duplicate_cursor_threshold(
 Incremental.EMPTY = Incremental[Any]()
 Incremental.EMPTY.__is_resolved__ = True
 
+TIncrementalConfig = Union[Incremental[Any], IncrementalArgs]
+
 
 class IncrementalResourceWrapper(ItemTransform[TDataItem]):
     placement_affinity: ClassVar[float] = 1  # stick to end
@@ -595,6 +618,34 @@ def get_incremental_arg(sig: inspect.Signature) -> Optional[inspect.Parameter]:
                 break
         return incremental_param
 
+    @staticmethod
+    def inject_implicit_incremental_arg(
+        incremental: Optional[Union[Incremental[Any], "IncrementalResourceWrapper"]],
+        sig: inspect.Signature,
+        func_args: Tuple[Any],
+        func_kwargs: Dict[str, Any],
+        fallback: Optional[Incremental[Any]] = None,
+    ) -> Tuple[Tuple[Any], Dict[str, Any], Optional[Incremental[Any]]]:
+        """Inject the incremental instance into function arguments
+        if the function has an incremental argument without default in its signature and it is not already set in the arguments.
+
+        Returns:
+            Tuple of the new args, kwargs and the incremental instance that was injected (if any)
+        """
+        if isinstance(incremental, IncrementalResourceWrapper):
+            incremental = incremental.incremental
+        if not incremental:
+            if not fallback:
+                return func_args, func_kwargs, None
+            incremental = fallback
+        incremental_param = IncrementalResourceWrapper.get_incremental_arg(sig)
+        if incremental_param:
+            bound_args = sig.bind_partial(*func_args, **func_kwargs)
+            if not bound_args.arguments.get(incremental_param.name):
+                bound_args.arguments[incremental_param.name] = incremental
+                return bound_args.args, bound_args.kwargs, incremental
+        return func_args, func_kwargs, None
+
     def wrap(self, sig: inspect.Signature, func: TFun) -> TFun:
         """Wrap the callable to inject an `Incremental` object configured for the resource."""
         incremental_param = self.get_incremental_arg(sig)
@@ -666,12 +717,14 @@ def incremental(self) -> Optional[Incremental[Any]]:
         return self._incremental
 
     def set_incremental(
-        self, incremental: Optional[Incremental[Any]], from_hints: bool = False
+        self, incremental: Optional[TIncrementalConfig], from_hints: bool = False
     ) -> None:
         """Sets the incremental. If incremental was set from_hints, it can only be changed in the same manner"""
         if self._from_hints and not from_hints:
             # do not accept incremental if apply hints were used
             return
+        if incremental is not None:
+            incremental = Incremental.ensure_instance(incremental)
         self._from_hints = from_hints
         self._incremental = incremental
 
@@ -710,6 +763,12 @@ def __call__(self, item: TDataItems, meta: Any = None) -> Optional[TDataItems]:
         return self._incremental(item, meta)
 
 
+def incremental_config_to_instance(cfg: TIncrementalConfig) -> Incremental[Any]:
+    if isinstance(cfg, Incremental):
+        return cfg
+    return Incremental(**cfg)
+
+
 __all__ = [
     "Incremental",
     "IncrementalResourceWrapper",
@@ -717,6 +776,7 @@ def __call__(self, item: TDataItems, meta: Any = None) -> Optional[TDataItems]:
     "IncrementalCursorPathMissing",
     "IncrementalPrimaryKeyMissing",
     "IncrementalUnboundError",
+    "TIncrementalconfig",
     "LastValueFunc",
     "TCursorValue",
 ]
diff --git a/dlt/extract/incremental/transform.py b/dlt/extract/incremental/transform.py
index 842c8aebe8..22b1194b51 100644
--- a/dlt/extract/incremental/transform.py
+++ b/dlt/extract/incremental/transform.py
@@ -5,7 +5,7 @@
 from dlt.common.utils import digest128
 from dlt.common.json import json
 from dlt.common.pendulum import pendulum
-from dlt.common.typing import TDataItem
+from dlt.common.typing import TDataItem, TColumnNames
 from dlt.common.jsonpath import find_values, compile_path, extract_simple_field_name
 from dlt.extract.incremental.exceptions import (
     IncrementalCursorInvalidCoercion,
@@ -13,10 +13,9 @@
     IncrementalPrimaryKeyMissing,
     IncrementalCursorPathHasValueNone,
 )
-from dlt.extract.incremental.typing import TCursorValue, LastValueFunc, OnCursorValueMissing
+from dlt.common.incremental.typing import TCursorValue, LastValueFunc, OnCursorValueMissing
 from dlt.extract.utils import resolve_column_value
 from dlt.extract.items import TTableHintTemplate
-from dlt.common.schema.typing import TColumnNames
 
 try:
     from dlt.common.libs import pyarrow
diff --git a/dlt/extract/items.py b/dlt/extract/items.py
index d721e8094e..888787e6b7 100644
--- a/dlt/extract/items.py
+++ b/dlt/extract/items.py
@@ -19,7 +19,14 @@
 )
 from concurrent.futures import Future
 
-from dlt.common.typing import TAny, TDataItem, TDataItems
+from dlt.common.typing import (
+    TAny,
+    TDataItem,
+    TDataItems,
+    TTableHintTemplate,
+    TFunHintTemplate,
+    TDynHintType,
+)
 
 
 TDecompositionStrategy = Literal["none", "scc"]
@@ -27,9 +34,6 @@
 TAwaitableDataItems = Awaitable[TDataItems]
 TPipedDataItems = Union[TDataItems, TDeferredDataItems, TAwaitableDataItems]
 
-TDynHintType = TypeVar("TDynHintType")
-TFunHintTemplate = Callable[[TDataItem], TDynHintType]
-TTableHintTemplate = Union[TDynHintType, TFunHintTemplate[TDynHintType]]
 
 if TYPE_CHECKING:
     TItemFuture = Future[TPipedDataItems]
diff --git a/dlt/extract/resource.py b/dlt/extract/resource.py
index c6ca1660f4..42e3905162 100644
--- a/dlt/extract/resource.py
+++ b/dlt/extract/resource.py
@@ -11,6 +11,7 @@
     Union,
     Any,
     Optional,
+    Mapping,
 )
 from typing_extensions import TypeVar, Self
 
@@ -28,6 +29,7 @@
     pipeline_state,
 )
 from dlt.common.utils import flatten_list_or_items, get_callable_name, uniq_id
+from dlt.common.schema.typing import TTableSchema
 from dlt.extract.utils import wrap_async_iterator, wrap_parallel_iterator
 
 from dlt.extract.items import (
@@ -42,7 +44,7 @@
 )
 from dlt.extract.pipe_iterator import ManagedPipeIterator
 from dlt.extract.pipe import Pipe, TPipeStep
-from dlt.extract.hints import DltResourceHints, HintsMeta, TResourceHints
+from dlt.extract.hints import DltResourceHints, HintsMeta, TResourceHints, make_hints
 from dlt.extract.incremental import Incremental, IncrementalResourceWrapper
 from dlt.extract.exceptions import (
     InvalidTransformerDataTypeGeneratorFunctionRequired,
@@ -442,35 +444,60 @@ def add_step(
             self._pipe.insert_step(item_transform, insert_at)
         return self
 
+    def _remove_incremental_step(self) -> None:
+        step_no = self._pipe.find(Incremental, IncrementalResourceWrapper)
+        if step_no >= 0:
+            self._pipe.remove_step(step_no)
+
+    def set_incremental(
+        self,
+        new_incremental: Union[Incremental[Any], IncrementalResourceWrapper],
+        from_hints: bool = False,
+    ) -> Optional[Union[Incremental[Any], IncrementalResourceWrapper]]:
+        """Set/replace the incremental transform for the resource.
+
+        Args:
+            new_incremental: The Incremental instance/hint to set or replace
+            from_hints: If the incremental is set from hints. Defaults to False.
+        """
+        if new_incremental is Incremental.EMPTY:
+            new_incremental = None
+        incremental = self.incremental
+        if incremental is not None:
+            # if isinstance(new_incremental, Mapping):
+            #     new_incremental = Incremental.ensure_instance(new_incremental)
+
+            if isinstance(new_incremental, IncrementalResourceWrapper):
+                # Completely replace the wrapper
+                self._remove_incremental_step()
+                self.add_step(new_incremental)
+            elif isinstance(incremental, IncrementalResourceWrapper):
+                incremental.set_incremental(new_incremental, from_hints=from_hints)
+            else:
+                self._remove_incremental_step()
+                # re-add the step
+                incremental = None
+        if incremental is None:
+            # if there's no wrapper add incremental as a transform
+            if new_incremental:
+                if not isinstance(new_incremental, IncrementalResourceWrapper):
+                    new_incremental = Incremental.ensure_instance(new_incremental)
+                self.add_step(new_incremental)
+        return new_incremental
+
     def _set_hints(
         self, table_schema_template: TResourceHints, create_table_variant: bool = False
     ) -> None:
         super()._set_hints(table_schema_template, create_table_variant)
         # validators and incremental apply only to resource hints
         if not create_table_variant:
-            incremental = self.incremental
             # try to late assign incremental
             if table_schema_template.get("incremental") is not None:
-                new_incremental = table_schema_template["incremental"]
-                # remove incremental if empty
-                if new_incremental is Incremental.EMPTY:
-                    new_incremental = None
-
-                if incremental is not None:
-                    if isinstance(incremental, IncrementalResourceWrapper):
-                        # replace in wrapper
-                        incremental.set_incremental(new_incremental, from_hints=True)
-                    else:
-                        step_no = self._pipe.find(Incremental)
-                        self._pipe.remove_step(step_no)
-                        # re-add the step
-                        incremental = None
-
-                if incremental is None:
-                    # if there's no wrapper add incremental as a transform
-                    incremental = new_incremental  # type: ignore
-                    if new_incremental:
-                        self.add_step(new_incremental)
+                incremental = self.set_incremental(
+                    table_schema_template["incremental"], from_hints=True
+                )
+            else:
+                incremental = self.incremental
 
             if incremental:
                 primary_key = table_schema_template.get("primary_key", incremental.primary_key)
@@ -480,10 +507,25 @@ def _set_hints(
             if table_schema_template.get("validator") is not None:
                 self.validator = table_schema_template["validator"]
 
+    def compute_table_schema(self, item: TDataItem = None, meta: Any = None) -> TTableSchema:
+        incremental: Optional[Union[Incremental[Any], IncrementalResourceWrapper]] = (
+            self.incremental
+        )
+        if incremental and "incremental" not in self._hints:
+            if isinstance(incremental, IncrementalResourceWrapper):
+                incremental = incremental.incremental
+                if incremental:
+                    self._hints["incremental"] = incremental
+
+        table_schema = super().compute_table_schema(item, meta)
+
+        return table_schema
+
     def bind(self: TDltResourceImpl, *args: Any, **kwargs: Any) -> TDltResourceImpl:
         """Binds the parametrized resource to passed arguments. Modifies resource pipe in place. Does not evaluate generators or iterators."""
         if self._args_bound:
             raise TypeError(f"Parametrized resource {self.name} is not callable")
+
         orig_gen = self._pipe.gen
         gen = self._pipe.bind_gen(*args, **kwargs)
         if isinstance(gen, DltResource):
@@ -599,14 +641,14 @@ def _eject_config(self) -> bool:
         if not self._pipe.is_empty and not self._args_bound:
             orig_gen = getattr(self._pipe.gen, "__GEN__", None)
             if orig_gen:
-                step_no = self._pipe.find(IncrementalResourceWrapper)
-                if step_no >= 0:
-                    self._pipe.remove_step(step_no)
+                self._remove_incremental_step()
                 self._pipe.replace_gen(orig_gen)
                 return True
         return False
 
-    def _inject_config(self) -> "DltResource":
+    def _inject_config(
+        self, incremental_from_hints_override: Optional[bool] = None
+    ) -> "DltResource":
         """Wraps the pipe generation step in incremental and config injection wrappers and adds pipe step with
         Incremental transform.
         """
@@ -618,8 +660,17 @@ def _inject_config(self) -> "DltResource":
         sig = inspect.signature(gen)
         if IncrementalResourceWrapper.should_wrap(sig):
             incremental = IncrementalResourceWrapper(self._hints.get("primary_key"))
+            if incr_hint := self._hints.get("incremental"):
+                incremental.set_incremental(
+                    incr_hint,
+                    from_hints=(
+                        incremental_from_hints_override
+                        if incremental_from_hints_override is not None
+                        else True
+                    ),
+                )
             incr_f = incremental.wrap(sig, gen)
-            self.add_step(incremental)
+            self.set_incremental(incremental)
         else:
             incr_f = gen
         resource_sections = (known_sections.SOURCES, self.section, self.name)
@@ -649,6 +700,12 @@ def _clone(
         if self._pipe and not self._pipe.is_empty:
             pipe = pipe._clone(new_name=new_name, with_parent=with_parent)
         # incremental and parent are already in the pipe (if any)
+
+        incremental = self.incremental
+        if isinstance(incremental, IncrementalResourceWrapper):
+            incremental_from_hints: Optional[bool] = incremental._from_hints
+        else:
+            incremental_from_hints = None
         r_ = self.__class__(
             pipe,
             self._clone_hints(self._hints),
@@ -661,7 +718,7 @@ def _clone(
         # this makes sure that a take config values from a right section and wrapper has a separated
         # instance in the pipeline
         if r_._eject_config():
-            r_._inject_config()
+            r_._inject_config(incremental_from_hints_override=incremental_from_hints)
         return r_
 
     def _get_config_section_context(self) -> ConfigSectionContext:
diff --git a/dlt/extract/utils.py b/dlt/extract/utils.py
index 55a8b0b8c4..68570d0995 100644
--- a/dlt/extract/utils.py
+++ b/dlt/extract/utils.py
@@ -22,8 +22,15 @@
 from dlt.common.data_writers import TDataItemFormat
 from dlt.common.exceptions import MissingDependencyException
 from dlt.common.pipeline import reset_resource_state
-from dlt.common.schema.typing import TColumnNames, TAnySchemaColumns, TTableSchemaColumns
-from dlt.common.typing import AnyFun, DictStrAny, TDataItem, TDataItems, TAnyFunOrGenerator
+from dlt.common.schema.typing import TAnySchemaColumns, TTableSchemaColumns
+from dlt.common.typing import (
+    AnyFun,
+    DictStrAny,
+    TDataItem,
+    TDataItems,
+    TAnyFunOrGenerator,
+    TColumnNames,
+)
 from dlt.common.utils import get_callable_name
 
 from dlt.extract.exceptions import (
diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py
index a9f07d417e..70d160ea67 100644
--- a/dlt/pipeline/pipeline.py
+++ b/dlt/pipeline/pipeline.py
@@ -38,7 +38,6 @@
 from dlt.common.exceptions import MissingDependencyException
 from dlt.common.runtime import signals, apply_runtime_config
 from dlt.common.schema.typing import (
-    TColumnNames,
     TSchemaTables,
     TTableFormat,
     TWriteDispositionConfig,
@@ -47,7 +46,7 @@
 )
 from dlt.common.schema.utils import normalize_schema_name
 from dlt.common.storages.exceptions import LoadPackageNotFound
-from dlt.common.typing import ConfigValue, TFun, TSecretStrValue, is_optional_type
+from dlt.common.typing import ConfigValue, TFun, TSecretStrValue, is_optional_type, TColumnNames
 from dlt.common.runners import pool_runner as runner
 from dlt.common.storages import (
     LiveSchemaStorage,
diff --git a/dlt/sources/rest_api/typing.py b/dlt/sources/rest_api/typing.py
index ccef828b1a..c48e54de4a 100644
--- a/dlt/sources/rest_api/typing.py
+++ b/dlt/sources/rest_api/typing.py
@@ -15,7 +15,7 @@
 from dlt.common.schema.typing import (
     TAnySchemaColumns,
 )
-from dlt.extract.incremental.typing import IncrementalArgs
+from dlt.common.incremental.typing import IncrementalArgs
 from dlt.extract.items import TTableHintTemplate
 from dlt.extract.hints import TResourceHintsBase
 from dlt.sources.helpers.rest_client.auth import AuthConfigBase, TApiKeyLocation
@@ -23,9 +23,8 @@
 from dataclasses import dataclass, field
 
 from dlt.common import jsonpath
-from dlt.common.typing import TSortOrder
+from dlt.common.typing import TSortOrder, TColumnNames
 from dlt.common.schema.typing import (
-    TColumnNames,
     TTableFormat,
     TAnySchemaColumns,
     TWriteDispositionConfig,
@@ -33,7 +32,7 @@
 )
 
 from dlt.extract.items import TTableHintTemplate
-from dlt.extract.incremental.typing import LastValueFunc
+from dlt.common.incremental.typing import LastValueFunc
 from dlt.extract.resource import DltResource
 
 from requests import Session
diff --git a/tests/common/test_jsonpath.py b/tests/common/test_jsonpath.py
new file mode 100644
index 0000000000..c4e9fbc664
--- /dev/null
+++ b/tests/common/test_jsonpath.py
@@ -0,0 +1,43 @@
+import pytest
+
+from dlt.common import jsonpath as jp
+
+
+@pytest.mark.parametrize("compiled", [True, False])
+@pytest.mark.parametrize(
+    "path, expected",
+    [
+        ("col_a", "col_a"),
+        ("'col.a'", "col.a"),
+        ("'$col_a'", "$col_a"),
+        ("'col|a'", "col|a"),
+    ],
+)
+def test_extract_simple_field_name_positive(path, expected, compiled):
+    if compiled:
+        path = jp.compile_path(path)
+
+    result = jp.extract_simple_field_name(path)
+    assert result == expected
+
+
+@pytest.mark.parametrize("compiled", [True, False])
+@pytest.mark.parametrize(
+    "path",
+    [
+        "$.col_a",
+        "$.col_a.items",
+        "$.col_a.items[0]",
+        "$.col_a.items[*]",
+        "col_a|col_b",
+    ],
+)
+def test_extract_simple_field_name_negative(path, compiled):
+    if compiled:
+        path = jp.compile_path(path)
+
+    result = jp.extract_simple_field_name(path)
+    assert result is None
+
+
+# TODO: Test all jsonpath utils
diff --git a/tests/common/test_validation.py b/tests/common/test_validation.py
index 3f8ccfc20f..f3ebb02b46 100644
--- a/tests/common/test_validation.py
+++ b/tests/common/test_validation.py
@@ -19,13 +19,13 @@
 from dlt.common import Decimal, jsonpath
 from dlt.common.exceptions import DictValidationException
 from dlt.common.schema.typing import (
-    TColumnNames,
     TStoredSchema,
     TColumnSchema,
     TWriteDispositionConfig,
 )
 from dlt.common.schema.utils import simple_regex_validator
-from dlt.common.typing import DictStrStr, StrStr, TDataItem, TSortOrder
+from dlt.common.typing import DictStrStr, StrStr, TDataItem, TSortOrder, TColumnNames
+
 from dlt.common.validation import validate_dict, validate_dict_ignoring_xkeys
 
 
diff --git a/tests/extract/test_extract.py b/tests/extract/test_extract.py
index dbec417f97..9343449aed 100644
--- a/tests/extract/test_extract.py
+++ b/tests/extract/test_extract.py
@@ -213,6 +213,48 @@ def with_table_hints():
     extract_step.extract(source, 20, 1)
 
 
+def test_extract_hints_mark_incremental(extract_step: Extract) -> None:
+    os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "TRUE"
+
+    @dlt.resource(columns=[{"name": "id", "data_type": "bigint"}], primary_key="id")
+    def with_table_hints():
+        # yield a regular dataset first, simulate backfil
+        yield [{"id": id_, "pk": "A"} for id_ in range(1, 10)]
+
+        # get the resource
+        resource = dlt.current.source().resources[dlt.current.resource_name()]
+        table = resource.compute_table_schema()
+        # also there we see the hints
+        assert table["columns"]["id"]["primary_key"] is True
+        assert table["columns"]["id"]["data_type"] == "bigint"
+
+        # start emitting incremental
+        yield dlt.mark.with_hints(
+            [{"id": id_, "pk": "A", "created_at": id_ + 10} for id_ in range(100, 110)],
+            make_hints(incremental=dlt.sources.incremental("created_at", initial_value=105)),
+        )
+
+        # get the resource
+        resource = dlt.current.source().resources[dlt.current.resource_name()]
+        assert resource.incremental.cursor_path == "created_at"  # type: ignore[attr-defined]
+        assert resource.incremental.primary_key == "id"
+        # we are able to add the incremental to the pipe. but it won't
+        # join actually executing pipe which is a clone of a (partial) pipe of the resource
+        assert isinstance(resource._pipe._steps[1], dlt.sources.incremental)
+        # NOTE: this results in unbounded exception
+        # assert resource.incremental.last_value == 299
+        table = resource.compute_table_schema()
+        assert table["columns"]["created_at"]["incremental"] is not None
+
+        yield [{"id": id_, "pk": "A", "created_at": id_ + 10} for id_ in range(110, 120)]
+
+    source = DltSource(dlt.Schema("hintable"), "module", [with_table_hints])
+    extract_step.extract(source, 20, 1)
+    # make sure incremental is in the source schema
+    table = source.schema.get_table("with_table_hints")
+    assert table["columns"]["created_at"]["incremental"] is not None
+
+
 def test_extract_metrics_on_exception_no_flush(extract_step: Extract) -> None:
     @dlt.resource
     def letters():
diff --git a/tests/extract/test_incremental.py b/tests/extract/test_incremental.py
index 7ce4228b6c..30df12ae17 100644
--- a/tests/extract/test_incremental.py
+++ b/tests/extract/test_incremental.py
@@ -5,7 +5,7 @@
 from datetime import datetime, date  # noqa: I251
 from itertools import chain, count
 from time import sleep
-from typing import Any, Optional
+from typing import Any, Optional, Literal, Sequence, Dict
 from unittest import mock
 
 import duckdb
@@ -1468,10 +1468,13 @@ def test_apply_hints_incremental(item_type: TestDataItemFormat) -> None:
     data = [{"created_at": 1}, {"created_at": 2}, {"created_at": 3}]
     source_items = data_to_item_format(item_type, data)
 
+    should_have_arg = True
+
     @dlt.resource
     def some_data(created_at: Optional[dlt.sources.incremental[int]] = None):
         # make sure that incremental from apply_hints is here
-        if created_at is not None:
+        if should_have_arg:
+            assert created_at is not None
             assert created_at.cursor_path == "created_at"
             assert created_at.last_value_func is max
         yield source_items
@@ -1505,6 +1508,7 @@ def some_data(created_at: Optional[dlt.sources.incremental[int]] = None):
     assert list(r) == []
 
     # remove incremental
+    should_have_arg = False
     r.apply_hints(incremental=dlt.sources.incremental.EMPTY)
     assert r.incremental is not None
     assert r.incremental.incremental is None
@@ -1515,6 +1519,7 @@ def some_data(created_at: Optional[dlt.sources.incremental[int]] = None):
 
     # as above but we provide explicit incremental when creating resource
     p = p.drop()
+    should_have_arg = True
     r = some_data(created_at=dlt.sources.incremental("created_at", last_value_func=min))
     # hints have precedence, as expected
     r.apply_hints(incremental=dlt.sources.incremental("created_at", last_value_func=max))
@@ -3568,3 +3573,223 @@ def some_data(
         call for call in logger_spy.call_args_list if "Large number of records" in call.args[0]
     ]
     assert len(warning_calls) == 1
+
+
+def _resource_for_table_hint(
+    hint_type: Literal[
+        "default_arg", "explicit_arg", "apply_hints", "default_arg_override", "decorator"
+    ],
+    data: Sequence[Dict[str, Any]],
+    incremental_arg: dlt.sources.incremental[Any],
+    incremental_arg_default: dlt.sources.incremental[Any] = None,
+) -> DltResource:
+    if incremental_arg is None and incremental_arg_default is None:
+        raise ValueError("One of the incremental arguments must be provided.")
+
+    decorator_arg = None
+    if hint_type == "default_arg":
+        default_arg = incremental_arg_default
+        override_arg = None
+    elif hint_type == "default_arg_override":
+        default_arg = incremental_arg_default
+        override_arg = incremental_arg
+    elif hint_type == "decorator":
+        default_arg = None
+        override_arg = None
+        decorator_arg = incremental_arg_default
+    else:
+        default_arg = None
+        override_arg = incremental_arg
+
+    @dlt.resource(incremental=decorator_arg)
+    def some_data(
+        updated_at: dlt.sources.incremental[Any] = default_arg,
+    ) -> Any:
+        yield data_to_item_format("object", data)
+
+    if override_arg is None:
+        return some_data()
+
+    if hint_type == "apply_hints":
+        rs = some_data()
+        rs.apply_hints(incremental=override_arg)
+        return rs
+
+    return some_data(updated_at=override_arg)
+
+
+@pytest.mark.parametrize(
+    "hint_type", ["default_arg", "explicit_arg", "apply_hints", "default_arg_override", "decorator"]
+)
+@pytest.mark.parametrize(
+    "incremental_settings",
+    [
+        {
+            "last_value_func": "min",
+            "row_order": "desc",
+            "on_cursor_value_missing": "include",
+        },
+        {"last_value_func": "max", "on_cursor_value_missing": "raise"},
+    ],
+)
+def test_incremental_table_hint_datetime_column(
+    hint_type: Literal[
+        "default_arg",
+        "explicit_arg",
+        "default_arg_override",
+        "apply_hints",
+        "decorator",
+    ],
+    incremental_settings: Dict[str, Any],
+) -> None:
+    initial_value_override = pendulum.now()
+    initial_value_default = pendulum.now().subtract(seconds=10)
+    rs = _resource_for_table_hint(
+        hint_type,
+        [{"updated_at": pendulum.now().add(seconds=i)} for i in range(1, 12)],
+        dlt.sources.incremental(
+            "updated_at", initial_value=initial_value_override, **incremental_settings
+        ),
+        dlt.sources.incremental(
+            "updated_at", initial_value=initial_value_default, **incremental_settings
+        ),
+    )
+
+    pipeline = dlt.pipeline(pipeline_name=uniq_id())
+    pipeline.extract(rs)
+
+    table_schema = pipeline.default_schema.tables["some_data"]
+
+    assert table_schema["columns"]["updated_at"]["incremental"] is True
+
+
+def incremental_instance_or_dict(use_dict: bool, **kwargs):
+    if use_dict:
+        return kwargs
+    return dlt.sources.incremental(**kwargs)
+
+
+@pytest.mark.parametrize("use_dict", [False, True])
+def test_incremental_in_resource_decorator(use_dict: bool) -> None:
+    # Incremental set in decorator, without any arguments
+    @dlt.resource(
+        incremental=incremental_instance_or_dict(
+            use_dict, cursor_path="value", initial_value=5, last_value_func=min
+        )
+    )
+    def no_incremental_arg():
+        yield [{"value": i} for i in range(10)]
+
+    result = list(no_incremental_arg())
+    # filtering is applied
+    assert result == [{"value": i} for i in range(0, 6)]
+
+    # Apply hints overrides the decorator settings
+    rs = no_incremental_arg()
+    rs.apply_hints(
+        incremental=incremental_instance_or_dict(
+            use_dict, cursor_path="value", initial_value=3, last_value_func=max
+        )
+    )
+    result = list(rs)
+    assert result == [{"value": i} for i in range(3, 10)]
+
+    @dlt.resource(
+        incremental=incremental_instance_or_dict(
+            use_dict, cursor_path="value", initial_value=5, last_value_func=min
+        )
+    )
+    def with_optional_incremental_arg(incremental: Optional[dlt.sources.incremental[int]] = None):
+        assert incremental is not None
+        yield [{"value": i} for i in range(10)]
+
+    # Decorator settings are used
+    result = list(with_optional_incremental_arg())
+    assert result == [{"value": i} for i in range(0, 6)]
+
+
+@pytest.mark.parametrize("use_dict", [False, True])
+def test_incremental_in_resource_decorator_default_arg(use_dict: bool) -> None:
+    @dlt.resource(
+        incremental=incremental_instance_or_dict(
+            use_dict, cursor_path="value", initial_value=5, last_value_func=min
+        )
+    )
+    def with_default_incremental_arg(
+        incremental: dlt.sources.incremental[int] = dlt.sources.incremental(
+            "value", initial_value=3, last_value_func=min
+        )
+    ):
+        assert incremental.last_value == initial_value
+        assert incremental.last_value_func == last_value_func
+        yield [{"value": i} for i in range(10)]
+
+    last_value_func = max
+    initial_value = 4
+    # Explicit argument overrides the default and decorator argument
+    result = list(
+        with_default_incremental_arg(
+            incremental=dlt.sources.incremental(
+                "value", initial_value=initial_value, last_value_func=last_value_func
+            )
+        )
+    )
+    assert result == [{"value": i} for i in range(4, 10)]
+
+    # Decorator param overrides function default arg
+    last_value_func = min
+    initial_value = 5
+    result = list(with_default_incremental_arg())
+    assert result == [{"value": i} for i in range(0, 6)]
+
+
+@pytest.mark.parametrize("use_dict", [False, True])
+def test_incremental_table_hint_merged_columns(use_dict: bool) -> None:
+    @dlt.resource(
+        incremental=incremental_instance_or_dict(
+            use_dict, cursor_path="col_a", initial_value=3, last_value_func=min
+        )
+    )
+    def some_data():
+        yield [{"col_a": i, "foo": i + 2, "col_b": i + 1, "bar": i + 3} for i in range(10)]
+
+    pipeline = dlt.pipeline(pipeline_name=uniq_id())
+    pipeline.extract(some_data())
+
+    table_schema = pipeline.default_schema.tables["some_data"]
+    assert table_schema["columns"]["col_a"]["incremental"] is True
+
+    rs = some_data()
+    rs.apply_hints(
+        incremental=incremental_instance_or_dict(
+            use_dict, cursor_path="col_b", initial_value=5, last_value_func=max
+        )
+    )
+
+    pipeline.extract(rs)
+
+    table_schema_2 = pipeline.default_schema.tables["some_data"]
+
+    # Only one column should have the hint
+    assert "incremental" not in table_schema_2["columns"]["col_a"]
+    assert table_schema_2["columns"]["col_b"]["incremental"] is True
+
+
+@pytest.mark.parametrize("use_dict", [True, False])
+def test_incremental_column_hint_cursor_is_not_column(use_dict: bool):
+    @dlt.resource(
+        incremental=incremental_instance_or_dict(
+            use_dict, cursor_path="col_a|col_b", initial_value=3, last_value_func=min
+        )
+    )
+    def some_data():
+        yield [{"col_a": i, "foo": i + 2, "col_b": i + 1, "bar": i + 3} for i in range(10)]
+
+    pipeline = dlt.pipeline(pipeline_name=uniq_id())
+
+    pipeline.extract(some_data())
+
+    table_schema = pipeline.default_schema.tables["some_data"]
+
+    for col in table_schema["columns"].values():
+        assert "incremental" not in col

From f4faa836df37cf810b2eb5b8ba754aa80f946719 Mon Sep 17 00:00:00 2001
From: rudolfix <rudolfix@rudolfix.org>
Date: Mon, 2 Dec 2024 16:24:57 +0100
Subject: [PATCH 3/4] #2087 allows double underscores in identifiers (#2098)

* removes astunparse and aiohttp

* allows for built-in ast unparse if present

* uses break path for normalization to allow names containing path separators, migrates old schema to enable compat mode with old behavior

* adds removeprefix util

* updates docs

* bumps dlt to version 1.4.1

* linter fixes

* fixes tests

* fixes and tests saving pandas indexes

* fixes sqllite read interface tests

* updates docs
---
 dlt/cli/deploy_command_helpers.py             |   13 +-
 dlt/cli/source_detection.py                   |    5 +-
 dlt/common/destination/reference.py           |    1 -
 dlt/common/libs/pandas.py                     |    5 +-
 dlt/common/normalizers/json/helpers.py        |  141 +
 dlt/common/normalizers/json/relational.py     |  172 +-
 dlt/common/normalizers/naming/naming.py       |    2 +
 dlt/common/normalizers/typing.py              |    2 +
 dlt/common/reflection/utils.py                |   14 +-
 dlt/common/schema/configuration.py            |    2 +
 dlt/common/schema/migrations.py               |    7 +-
 dlt/common/schema/normalizers.py              |    7 +-
 dlt/common/schema/schema.py                   |   26 +-
 dlt/common/schema/typing.py                   |    2 +-
 dlt/common/utils.py                           |    5 +
 dlt/destinations/dataset.py                   |    6 +-
 .../impl/clickhouse/sql_client.py             |    6 +-
 .../impl/filesystem/filesystem.py             |    3 +-
 dlt/extract/extractors.py                     |   14 +-
 dlt/normalize/worker.py                       |    5 +-
 dlt/reflection/script_visitor.py              |    9 +-
 dlt/sources/sql_database/arrow_helpers.py     |    5 +-
 .../dlt-ecosystem/destinations/filesystem.md  |    2 +-
 .../verified-sources/arrow-pandas.md          |    2 +
 .../docs/general-usage/naming-convention.md   |   39 +
 mypy.ini                                      |    2 +-
 poetry.lock                                   |  110 +-
 pyproject.toml                                |    5 +-
 .../cases/schemas/eth/ethereum_schema_v11.yml |  394 +++
 .../cases/schemas/github/issues.schema.json   | 2404 ++++++++---------
 .../normalizers/test_json_relational.py       |   10 +-
 .../normalizers/test_naming_snake_case.py     |    8 +
 .../common/schema/test_import_normalizers.py  |   36 +-
 .../schema/test_normalize_identifiers.py      |   62 +-
 tests/common/schema/test_schema.py            |   20 +-
 tests/common/schema/test_versioning.py        |   12 +-
 tests/common/storages/test_schema_storage.py  |   12 +-
 tests/common/storages/utils.py                |    4 +-
 tests/common/test_utils.py                    |    9 +
 tests/common/test_validation.py               |    2 +-
 tests/common/utils.py                         |    6 +-
 .../cases/eth_source/ethereum.schema.yaml     |    4 +-
 tests/extract/test_decorators.py              |    4 +-
 tests/extract/test_incremental.py             |   76 +-
 tests/libs/pyarrow/test_pyarrow_normalizer.py |    4 +-
 .../test_clickhouse_configuration.py          |   26 +-
 tests/load/conftest.py                        |    2 +-
 tests/load/duckdb/test_duckdb_client.py       |    2 +-
 tests/load/filesystem/test_aws_credentials.py |    1 -
 .../load/filesystem/test_filesystem_common.py |    1 -
 tests/load/pipeline/conftest.py               |    2 +-
 tests/load/pipeline/test_merge_disposition.py |    2 +-
 tests/load/pipeline/test_scd2.py              |    3 +-
 tests/load/qdrant/utils.py                    |    1 -
 tests/load/redshift/test_redshift_client.py   |    2 +-
 tests/load/test_job_client.py                 |    2 +-
 tests/load/test_read_interfaces.py            |   11 +-
 tests/load/test_sql_client.py                 |    2 +-
 tests/load/weaviate/utils.py                  |    1 -
 .../cases/github_pipeline/github_rev.py       |   26 +
 tests/pipeline/test_dlt_versions.py           |   56 +
 .../test_max_nesting.py                       |    0
 tests/pipeline/test_pipeline.py               |  105 +
 63 files changed, 2203 insertions(+), 1721 deletions(-)
 create mode 100644 dlt/common/normalizers/json/helpers.py
 create mode 100644 tests/common/cases/schemas/eth/ethereum_schema_v11.yml
 create mode 100644 tests/pipeline/cases/github_pipeline/github_rev.py
 rename tests/{normalize => pipeline}/test_max_nesting.py (100%)

diff --git a/dlt/cli/deploy_command_helpers.py b/dlt/cli/deploy_command_helpers.py
index b508b32226..e3719fbe38 100644
--- a/dlt/cli/deploy_command_helpers.py
+++ b/dlt/cli/deploy_command_helpers.py
@@ -5,7 +5,6 @@
 from yaml import Dumper
 from itertools import chain
 from typing import List, Optional, Sequence, Tuple, Any, Dict
-from astunparse import unparse
 
 # optional dependencies
 import pipdeptree
@@ -23,7 +22,7 @@
 from dlt.common.git import get_origin, get_repo, Repo
 from dlt.common.configuration.specs.runtime_configuration import get_default_pipeline_name
 from dlt.common.typing import StrAny
-from dlt.common.reflection.utils import evaluate_node_literal
+from dlt.common.reflection.utils import evaluate_node_literal, ast_unparse
 from dlt.common.pipeline import LoadInfo, TPipelineState, get_dlt_repos_dir
 from dlt.common.storages import FileStorage
 from dlt.common.utils import set_working_dir
@@ -313,7 +312,7 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
                 if f_r_value is None:
                     fmt.warning(
                         "The value of `dev_mode` in call to `dlt.pipeline` cannot be"
-                        f" determined from {unparse(f_r_node).strip()}. We assume that you know"
+                        f" determined from {ast_unparse(f_r_node).strip()}. We assume that you know"
                         " what you are doing :)"
                     )
                 if f_r_value is True:
@@ -331,8 +330,8 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
                     raise CliCommandInnerException(
                         "deploy",
                         "The value of 'pipelines_dir' argument in call to `dlt_pipeline` cannot be"
-                        f" determined from {unparse(p_d_node).strip()}. Pipeline working dir will"
-                        " be found. Pass it directly with --pipelines-dir option.",
+                        f" determined from {ast_unparse(p_d_node).strip()}. Pipeline working dir"
+                        " will be found. Pass it directly with --pipelines-dir option.",
                     )
 
             p_n_node = call_args.arguments.get("pipeline_name")
@@ -342,8 +341,8 @@ def parse_pipeline_info(visitor: PipelineScriptVisitor) -> List[Tuple[str, Optio
                     raise CliCommandInnerException(
                         "deploy",
                         "The value of 'pipeline_name' argument in call to `dlt_pipeline` cannot be"
-                        f" determined from {unparse(p_d_node).strip()}. Pipeline working dir will"
-                        " be found. Pass it directly with --pipeline-name option.",
+                        f" determined from {ast_unparse(p_d_node).strip()}. Pipeline working dir"
+                        " will be found. Pass it directly with --pipeline-name option.",
                     )
             pipelines.append((pipeline_name, pipelines_dir))
 
diff --git a/dlt/cli/source_detection.py b/dlt/cli/source_detection.py
index f4e9b3e050..7067f8b896 100644
--- a/dlt/cli/source_detection.py
+++ b/dlt/cli/source_detection.py
@@ -1,11 +1,10 @@
 import ast
 import inspect
-from astunparse import unparse
 from typing import Dict, Tuple, Set, List
 
 from dlt.common.configuration import is_secret_hint
 from dlt.common.configuration.specs import BaseConfiguration
-from dlt.common.reflection.utils import creates_func_def_name_node
+from dlt.common.reflection.utils import creates_func_def_name_node, ast_unparse
 from dlt.common.typing import is_optional_type
 
 from dlt.sources import SourceReference
@@ -65,7 +64,7 @@ def find_source_calls_to_replace(
     for calls in visitor.known_sources_resources_calls.values():
         for call in calls:
             transformed_nodes.append(
-                (call.func, ast.Name(id=pipeline_name + "_" + unparse(call.func)))
+                (call.func, ast.Name(id=pipeline_name + "_" + ast_unparse(call.func)))
             )
 
     return transformed_nodes
diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py
index d1024eb28c..e27f99cde7 100644
--- a/dlt/common/destination/reference.py
+++ b/dlt/common/destination/reference.py
@@ -81,7 +81,6 @@
         DataFrame = Any
         ArrowTable = Any
         IbisBackend = Any
-
 else:
     DataFrame = Any
     ArrowTable = Any
diff --git a/dlt/common/libs/pandas.py b/dlt/common/libs/pandas.py
index a165ea8747..35cfe623bb 100644
--- a/dlt/common/libs/pandas.py
+++ b/dlt/common/libs/pandas.py
@@ -8,8 +8,9 @@
     raise MissingDependencyException("dlt Pandas Helpers", ["pandas"])
 
 
-def pandas_to_arrow(df: pandas.DataFrame) -> Any:
+def pandas_to_arrow(df: pandas.DataFrame, preserve_index: bool = False) -> Any:
     """Converts pandas to arrow or raises an exception if pyarrow is not installed"""
     from dlt.common.libs.pyarrow import pyarrow as pa
 
-    return pa.Table.from_pandas(df)
+    # NOTE: None preserves named indexes but ignores unnamed
+    return pa.Table.from_pandas(df, preserve_index=preserve_index)
diff --git a/dlt/common/normalizers/json/helpers.py b/dlt/common/normalizers/json/helpers.py
new file mode 100644
index 0000000000..96c9ab4954
--- /dev/null
+++ b/dlt/common/normalizers/json/helpers.py
@@ -0,0 +1,141 @@
+"""
+Cached helper methods for all operations that are called often
+"""
+from functools import lru_cache
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+from dlt.common.json import json
+from dlt.common.destination.utils import resolve_merge_strategy
+from dlt.common.normalizers.naming import NamingConvention
+from dlt.common.normalizers.typing import TRowIdType
+from dlt.common.normalizers.utils import DLT_ID_LENGTH_BYTES
+from dlt.common.schema import Schema
+from dlt.common.schema.typing import TColumnSchema, C_DLT_ID, DLT_NAME_PREFIX
+from dlt.common.schema.utils import (
+    get_columns_names_with_prop,
+    get_first_column_name_with_prop,
+    is_nested_table,
+)
+from dlt.common.utils import digest128
+
+
+@lru_cache(maxsize=None)
+def shorten_fragments(naming: NamingConvention, *idents: str) -> str:
+    return naming.shorten_fragments(*idents)
+
+
+@lru_cache(maxsize=None)
+def normalize_table_identifier(schema: Schema, naming: NamingConvention, table_name: str) -> str:
+    if schema._normalizers_config.get("use_break_path_on_normalize", True):
+        return naming.normalize_tables_path(table_name)
+    else:
+        return naming.normalize_table_identifier(table_name)
+
+
+@lru_cache(maxsize=None)
+def normalize_identifier(schema: Schema, naming: NamingConvention, identifier: str) -> str:
+    if schema._normalizers_config.get("use_break_path_on_normalize", True):
+        return naming.normalize_path(identifier)
+    else:
+        return naming.normalize_identifier(identifier)
+
+
+@lru_cache(maxsize=None)
+def get_table_nesting_level(
+    schema: Schema, table_name: str, default_nesting: int = 1000
+) -> Optional[int]:
+    """gets table nesting level, will inherit from parent if not set"""
+
+    table = schema.tables.get(table_name)
+    if (
+        table
+        and (max_nesting := cast(int, table.get("x-normalizer", {}).get("max_nesting"))) is not None
+    ):
+        return max_nesting
+    return default_nesting
+
+
+@lru_cache(maxsize=None)
+def get_primary_key(schema: Schema, table_name: str) -> List[str]:
+    if table_name not in schema.tables:
+        return []
+    table = schema.get_table(table_name)
+    return get_columns_names_with_prop(table, "primary_key", include_incomplete=True)
+
+
+@lru_cache(maxsize=None)
+def is_nested_type(
+    schema: Schema,
+    table_name: str,
+    field_name: str,
+    _r_lvl: int,
+) -> bool:
+    """For those paths the nested objects should be left in place.
+    Cache perf: max_nesting < _r_lvl: ~2x faster, full check 10x faster
+    """
+
+    # nesting level is counted backwards
+    # is we have traversed to or beyond the calculated nesting level, we detect a nested type
+    if _r_lvl <= 0:
+        return True
+
+    column: TColumnSchema = None
+    table = schema.tables.get(table_name)
+    if table:
+        column = table["columns"].get(field_name)
+    if column is None or "data_type" not in column:
+        data_type = schema.get_preferred_type(field_name)
+    else:
+        data_type = column["data_type"]
+
+    return data_type == "json"
+
+
+@lru_cache(maxsize=None)
+def get_nested_row_id_type(schema: Schema, table_name: str) -> Tuple[TRowIdType, bool]:
+    """Gets type of row id to be added to nested table and if linking information should be added"""
+    if table := schema.tables.get(table_name):
+        merge_strategy = resolve_merge_strategy(schema.tables, table)
+        if merge_strategy not in ("upsert", "scd2") and not is_nested_table(table):
+            return "random", False
+    else:
+        # table will be created, use standard linking
+        pass
+    return "row_hash", True
+
+
+@lru_cache(maxsize=None)
+def get_root_row_id_type(schema: Schema, table_name: str) -> TRowIdType:
+    if table := schema.tables.get(table_name):
+        merge_strategy = resolve_merge_strategy(schema.tables, table)
+        if merge_strategy == "upsert":
+            return "key_hash"
+        elif merge_strategy == "scd2":
+            x_row_version_col = get_first_column_name_with_prop(
+                schema.get_table(table_name),
+                "x-row-version",
+                include_incomplete=True,
+            )
+            if x_row_version_col == schema.naming.normalize_identifier(C_DLT_ID):
+                return "row_hash"
+    return "random"
+
+
+def get_row_hash(row: Dict[str, Any], subset: Optional[List[str]] = None) -> str:
+    """Returns hash of row.
+
+    Hash includes column names and values and is ordered by column name.
+    Excludes dlt system columns.
+    Can be used as deterministic row identifier.
+    """
+    row_filtered = {k: v for k, v in row.items() if not k.startswith(DLT_NAME_PREFIX)}
+    if subset is not None:
+        row_filtered = {k: v for k, v in row.items() if k in subset}
+    row_str = json.dumps(row_filtered, sort_keys=True)
+    return digest128(row_str, DLT_ID_LENGTH_BYTES)
+
+
+def get_nested_row_hash(parent_row_id: str, nested_table: str, list_idx: int) -> str:
+    # create deterministic unique id of the nested row taking into account that all lists are ordered
+    # and all nested tables must be lists
+    return digest128(f"{parent_row_id}_{nested_table}_{list_idx}", DLT_ID_LENGTH_BYTES)
diff --git a/dlt/common/normalizers/json/relational.py b/dlt/common/normalizers/json/relational.py
index c5338192a0..e365017125 100644
--- a/dlt/common/normalizers/json/relational.py
+++ b/dlt/common/normalizers/json/relational.py
@@ -1,34 +1,27 @@
-from functools import lru_cache
 from typing import Dict, List, Mapping, Optional, Sequence, Tuple, cast, TypedDict, Any
-from dlt.common.destination.utils import resolve_merge_strategy
-from dlt.common.json import json
-from dlt.common.normalizers.exceptions import InvalidJsonNormalizer
-from dlt.common.normalizers.typing import TJSONNormalizer, TRowIdType
-from dlt.common.normalizers.utils import generate_dlt_id, DLT_ID_LENGTH_BYTES
 
+from dlt.common.normalizers.exceptions import InvalidJsonNormalizer
+from dlt.common.normalizers.typing import TJSONNormalizer
+from dlt.common.normalizers.utils import generate_dlt_id
 from dlt.common.typing import DictStrAny, TDataItem, StrAny
 from dlt.common.schema import Schema
 from dlt.common.schema.typing import (
     C_DLT_ID,
     C_DLT_LOAD_ID,
-    TColumnSchema,
     TColumnName,
     TSimpleRegex,
-    DLT_NAME_PREFIX,
 )
 from dlt.common.schema.utils import (
     column_name_validator,
-    get_columns_names_with_prop,
-    get_first_column_name_with_prop,
-    has_column_with_prop,
     is_nested_table,
 )
-from dlt.common.utils import digest128, update_dict_nested
+from dlt.common.utils import update_dict_nested
 from dlt.common.normalizers.json import (
     TNormalizedRowIterator,
     wrap_in_dict,
     DataItemNormalizer as DataItemNormalizerBase,
 )
+from dlt.common.normalizers.json import helpers
 from dlt.common.validation import validate_dict
 
 
@@ -103,18 +96,18 @@ def _flatten(
         def norm_row_dicts(dict_row: StrAny, __r_lvl: int, path: Tuple[str, ...] = ()) -> None:
             for k, v in dict_row.items():
                 if k.strip():
-                    norm_k = self._normalize_identifier(self.schema, k)
+                    norm_k = helpers.normalize_identifier(self.schema, self.naming, k)
                 else:
                     # for empty keys in the data use _
                     norm_k = self.EMPTY_KEY_IDENTIFIER
                 # if norm_k != k:
                 #     print(f"{k} -> {norm_k}")
                 nested_name = (
-                    norm_k if path == () else self._shorten_fragments(self.schema, *path, norm_k)
+                    norm_k if path == () else helpers.shorten_fragments(self.naming, *path, norm_k)
                 )
                 # for lists and dicts we must check if type is possibly nested
                 if isinstance(v, (dict, list)):
-                    if not self._is_nested_type(self.schema, table, nested_name, __r_lvl):
+                    if not helpers.is_nested_type(self.schema, table, nested_name, __r_lvl):
                         # TODO: if schema contains table {table}__{nested_name} then convert v into single element list
                         if isinstance(v, dict):
                             # flatten the dict more
@@ -122,7 +115,8 @@ def norm_row_dicts(dict_row: StrAny, __r_lvl: int, path: Tuple[str, ...] = ()) -
                         else:
                             # pass the list to out_rec_list
                             out_rec_list[
-                                path + (self._normalize_table_identifier(self.schema, k),)
+                                path
+                                + (helpers.normalize_table_identifier(self.schema, self.naming, k),)
                             ] = v
                         continue
                     else:
@@ -134,26 +128,6 @@ def norm_row_dicts(dict_row: StrAny, __r_lvl: int, path: Tuple[str, ...] = ()) -
         norm_row_dicts(dict_row, _r_lvl)
         return out_rec_row, out_rec_list
 
-    @staticmethod
-    def get_row_hash(row: Dict[str, Any], subset: Optional[List[str]] = None) -> str:
-        """Returns hash of row.
-
-        Hash includes column names and values and is ordered by column name.
-        Excludes dlt system columns.
-        Can be used as deterministic row identifier.
-        """
-        row_filtered = {k: v for k, v in row.items() if not k.startswith(DLT_NAME_PREFIX)}
-        if subset is not None:
-            row_filtered = {k: v for k, v in row.items() if k in subset}
-        row_str = json.dumps(row_filtered, sort_keys=True)
-        return digest128(row_str, DLT_ID_LENGTH_BYTES)
-
-    @staticmethod
-    def _get_nested_row_hash(parent_row_id: str, nested_table: str, list_idx: int) -> str:
-        # create deterministic unique id of the nested row taking into account that all lists are ordered
-        # and all nested tables must be lists
-        return digest128(f"{parent_row_id}_{nested_table}_{list_idx}", DLT_ID_LENGTH_BYTES)
-
     def _link_row(self, row: DictStrAny, parent_row_id: str, list_idx: int) -> DictStrAny:
         assert parent_row_id
         row[self.c_dlt_parent_id] = parent_row_id
@@ -175,20 +149,20 @@ def _add_row_id(
         is_root: bool = False,
     ) -> str:
         if is_root:  # root table
-            row_id_type = self._get_root_row_id_type(self.schema, table)
+            row_id_type = helpers.get_root_row_id_type(self.schema, table)
             if row_id_type in ("key_hash", "row_hash"):
                 subset = None
                 if row_id_type == "key_hash":
-                    subset = self._get_primary_key(self.schema, table)
+                    subset = helpers.get_primary_key(self.schema, table)
                 # base hash on `dict_row` instead of `flattened_row`
                 # so changes in nested tables lead to new row id
-                row_id = self.get_row_hash(dict_row, subset=subset)
+                row_id = helpers.get_row_hash(dict_row, subset=subset)
             else:
                 row_id = generate_dlt_id()
         else:  # nested table
-            row_id_type, is_nested = self._get_nested_row_id_type(self.schema, table)
+            row_id_type, is_nested = helpers.get_nested_row_id_type(self.schema, table)
             if row_id_type == "row_hash":
-                row_id = DataItemNormalizer._get_nested_row_hash(parent_row_id, table, pos)
+                row_id = helpers.get_nested_row_hash(parent_row_id, table, pos)
                 # link to parent table
                 if is_nested:
                     self._link_row(flattened_row, parent_row_id, pos)
@@ -227,7 +201,7 @@ def _normalize_list(
         parent_row_id: Optional[str] = None,
         _r_lvl: int = 0,
     ) -> TNormalizedRowIterator:
-        table = self._shorten_fragments(self.schema, *parent_path, *ident_path)
+        table = helpers.shorten_fragments(self.naming, *parent_path, *ident_path)
 
         for idx, v in enumerate(seq):
             if isinstance(v, dict):
@@ -251,7 +225,7 @@ def _normalize_list(
                 wrap_v = wrap_in_dict(self.c_value, v)
                 DataItemNormalizer._extend_row(extend, wrap_v)
                 self._add_row_id(table, wrap_v, wrap_v, parent_row_id, idx)
-                yield (table, self._shorten_fragments(self.schema, *parent_path)), wrap_v
+                yield (table, helpers.shorten_fragments(self.naming, *parent_path)), wrap_v
 
     def _normalize_row(
         self,
@@ -264,8 +238,8 @@ def _normalize_row(
         _r_lvl: int = 0,
         is_root: bool = False,
     ) -> TNormalizedRowIterator:
-        schema = self.schema
-        table = self._shorten_fragments(schema, *parent_path, *ident_path)
+        naming = self.naming
+        table = helpers.shorten_fragments(naming, *parent_path, *ident_path)
         # flatten current row and extract all lists to recur into
         flattened_row, lists = self._flatten(table, dict_row, _r_lvl)
         # always extend row
@@ -280,7 +254,7 @@ def _normalize_row(
 
         # yield parent table first
         should_descend = yield (
-            (table, self._shorten_fragments(schema, *parent_path)),
+            (table, helpers.shorten_fragments(naming, *parent_path)),
             flattened_row,
         )
         if should_descend is False:
@@ -361,8 +335,10 @@ def normalize_data_item(
         # identify load id if loaded data must be processed after loading incrementally
         row[self.c_dlt_load_id] = load_id
         # get table name and nesting level
-        root_table_name = self._normalize_table_identifier(self.schema, table_name)
-        max_nesting = self._get_table_nesting_level(self.schema, root_table_name, self.max_nesting)
+        root_table_name = helpers.normalize_table_identifier(self.schema, self.naming, table_name)
+        max_nesting = helpers.get_table_nesting_level(
+            self.schema, root_table_name, self.max_nesting
+        )
 
         yield from self._normalize_row(
             row,
@@ -426,103 +402,3 @@ def _normalize_prop(
             "./normalizers/json/config",
             validator_f=column_name_validator(schema.naming),
         )
-
-    #
-    # Cached helper methods for all operations that are called often
-    #
-    @staticmethod
-    @lru_cache(maxsize=None)
-    def _shorten_fragments(schema: Schema, *idents: str) -> str:
-        return schema.naming.shorten_fragments(*idents)
-
-    @staticmethod
-    @lru_cache(maxsize=None)
-    def _normalize_table_identifier(schema: Schema, table_name: str) -> str:
-        return schema.naming.normalize_table_identifier(table_name)
-
-    @staticmethod
-    @lru_cache(maxsize=None)
-    def _normalize_identifier(schema: Schema, identifier: str) -> str:
-        return schema.naming.normalize_path(identifier)
-
-    @staticmethod
-    @lru_cache(maxsize=None)
-    def _get_table_nesting_level(
-        schema: Schema, table_name: str, default_nesting: int = 1000
-    ) -> Optional[int]:
-        """gets table nesting level, will inherit from parent if not set"""
-
-        table = schema.tables.get(table_name)
-        if (
-            table
-            and (max_nesting := cast(int, table.get("x-normalizer", {}).get("max_nesting")))
-            is not None
-        ):
-            return max_nesting
-        return default_nesting
-
-    @staticmethod
-    @lru_cache(maxsize=None)
-    def _get_primary_key(schema: Schema, table_name: str) -> List[str]:
-        if table_name not in schema.tables:
-            return []
-        table = schema.get_table(table_name)
-        return get_columns_names_with_prop(table, "primary_key", include_incomplete=True)
-
-    @staticmethod
-    @lru_cache(maxsize=None)
-    def _is_nested_type(
-        schema: Schema,
-        table_name: str,
-        field_name: str,
-        _r_lvl: int,
-    ) -> bool:
-        """For those paths the nested objects should be left in place.
-        Cache perf: max_nesting < _r_lvl: ~2x faster, full check 10x faster
-        """
-
-        # nesting level is counted backwards
-        # is we have traversed to or beyond the calculated nesting level, we detect a nested type
-        if _r_lvl <= 0:
-            return True
-
-        column: TColumnSchema = None
-        table = schema.tables.get(table_name)
-        if table:
-            column = table["columns"].get(field_name)
-        if column is None or "data_type" not in column:
-            data_type = schema.get_preferred_type(field_name)
-        else:
-            data_type = column["data_type"]
-
-        return data_type == "json"
-
-    @staticmethod
-    @lru_cache(maxsize=None)
-    def _get_nested_row_id_type(schema: Schema, table_name: str) -> Tuple[TRowIdType, bool]:
-        """Gets type of row id to be added to nested table and if linking information should be added"""
-        if table := schema.tables.get(table_name):
-            merge_strategy = resolve_merge_strategy(schema.tables, table)
-            if merge_strategy not in ("upsert", "scd2") and not is_nested_table(table):
-                return "random", False
-        else:
-            # table will be created, use standard linking
-            pass
-        return "row_hash", True
-
-    @staticmethod
-    @lru_cache(maxsize=None)
-    def _get_root_row_id_type(schema: Schema, table_name: str) -> TRowIdType:
-        if table := schema.tables.get(table_name):
-            merge_strategy = resolve_merge_strategy(schema.tables, table)
-            if merge_strategy == "upsert":
-                return "key_hash"
-            elif merge_strategy == "scd2":
-                x_row_version_col = get_first_column_name_with_prop(
-                    schema.get_table(table_name),
-                    "x-row-version",
-                    include_incomplete=True,
-                )
-                if x_row_version_col == schema.naming.normalize_identifier(C_DLT_ID):
-                    return "row_hash"
-        return "random"
diff --git a/dlt/common/normalizers/naming/naming.py b/dlt/common/normalizers/naming/naming.py
index 5ae5847963..9953d25913 100644
--- a/dlt/common/normalizers/naming/naming.py
+++ b/dlt/common/normalizers/naming/naming.py
@@ -45,6 +45,8 @@ def make_path(self, *identifiers: str) -> str:
 
     def break_path(self, path: str) -> Sequence[str]:
         """Breaks path into sequence of identifiers"""
+        # TODO: this is no longer needed if we modify all naming convention to do not contract
+        #   underscores then also normalize_path will not be needed
         return [ident for ident in path.split(self.PATH_SEPARATOR) if ident.strip()]
 
     def normalize_path(self, path: str) -> str:
diff --git a/dlt/common/normalizers/typing.py b/dlt/common/normalizers/typing.py
index 9840f3a4d2..16ad097fde 100644
--- a/dlt/common/normalizers/typing.py
+++ b/dlt/common/normalizers/typing.py
@@ -18,5 +18,7 @@ class TJSONNormalizer(TypedDict, total=False):
 class TNormalizersConfig(TypedDict, total=False):
     names: str
     allow_identifier_change_on_table_with_data: Optional[bool]
+    use_break_path_on_normalize: Optional[bool]
+    """Post 1.4.0 to allow table and column names that contain table separators"""
     detections: Optional[List[str]]
     json: TJSONNormalizer
diff --git a/dlt/common/reflection/utils.py b/dlt/common/reflection/utils.py
index cbf38a7327..c612c5a4f1 100644
--- a/dlt/common/reflection/utils.py
+++ b/dlt/common/reflection/utils.py
@@ -1,7 +1,13 @@
 import ast
 import inspect
-import astunparse
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, Callable
+
+try:
+    import astunparse
+
+    ast_unparse: Callable[[ast.AST], str] = astunparse.unparse
+except ImportError:
+    ast_unparse = ast.unparse  #  type: ignore[attr-defined, unused-ignore]
 
 from dlt.common.typing import AnyFun
 
@@ -25,7 +31,7 @@ def get_literal_defaults(node: Union[ast.FunctionDef, ast.AsyncFunctionDef]) ->
     literal_defaults: Dict[str, str] = {}
     for arg, default in zip(reversed(args), reversed(defaults)):
         if default:
-            literal_defaults[str(arg.arg)] = astunparse.unparse(default).strip()
+            literal_defaults[str(arg.arg)] = ast_unparse(default).strip()
 
     return literal_defaults
 
@@ -99,7 +105,7 @@ def rewrite_python_script(
             script_lines.append(source_script_lines[last_line][last_offset : node.col_offset])
 
         # replace node value
-        script_lines.append(astunparse.unparse(t_value).strip())
+        script_lines.append(ast_unparse(t_value).strip())
         last_line = node.end_lineno - 1
         last_offset = node.end_col_offset
 
diff --git a/dlt/common/schema/configuration.py b/dlt/common/schema/configuration.py
index e64dd57494..72f79026da 100644
--- a/dlt/common/schema/configuration.py
+++ b/dlt/common/schema/configuration.py
@@ -14,3 +14,5 @@ class SchemaConfiguration(BaseConfiguration):
     naming: Optional[TNamingConventionReferenceArg] = None  # Union[str, NamingConvention]
     json_normalizer: Optional[DictStrAny] = None
     allow_identifier_change_on_table_with_data: Optional[bool] = None
+    use_break_path_on_normalize: Optional[bool] = None
+    """Post 1.4.0 to allow table and column names that contain table separators"""
diff --git a/dlt/common/schema/migrations.py b/dlt/common/schema/migrations.py
index d9e758f204..06eb35c0f6 100644
--- a/dlt/common/schema/migrations.py
+++ b/dlt/common/schema/migrations.py
@@ -29,13 +29,13 @@ def migrate_schema(schema_dict: DictStrAny, from_engine: int, to_engine: int) ->
         schema_dict["excludes"] = []
         from_engine = 2
     if from_engine == 2 and to_engine > 2:
-        from dlt.common.schema.normalizers import import_normalizers, explicit_normalizers
+        from dlt.common.schema.normalizers import import_normalizers, configured_normalizers
 
         # current version of the schema
         current = cast(TStoredSchema, schema_dict)
         # add default normalizers and root hash propagation
         # use explicit None to get default settings. ignore any naming conventions
-        normalizers = explicit_normalizers(naming=None, json_normalizer=None)
+        normalizers = configured_normalizers(naming=None, json_normalizer=None)
         current["normalizers"], _, _ = import_normalizers(normalizers, normalizers)
         current["normalizers"]["json"]["config"] = {
             "propagation": {"root": {"_dlt_id": "_dlt_root_id"}}
@@ -169,6 +169,9 @@ def migrate_filters(group: str, filters: List[str]) -> None:
                 json_config.pop("generate_dlt_id", None)
 
         from_engine = 10
+    if from_engine == 10 and to_engine > 10:
+        schema_dict["normalizers"]["use_break_path_on_normalize"] = False
+        from_engine = 11
 
     schema_dict["engine_version"] = from_engine
     if from_engine != to_engine:
diff --git a/dlt/common/schema/normalizers.py b/dlt/common/schema/normalizers.py
index 9b2a37e708..8f42e90596 100644
--- a/dlt/common/schema/normalizers.py
+++ b/dlt/common/schema/normalizers.py
@@ -40,13 +40,14 @@ def _section_for_schema(kwargs: Dict[str, Any]) -> Tuple[str, ...]:
 
 
 @with_config(spec=SchemaConfiguration, sections=_section_for_schema)  # type: ignore[call-overload]
-def explicit_normalizers(
+def configured_normalizers(
     naming: TNamingConventionReferenceArg = dlt.config.value,
     json_normalizer: TJSONNormalizer = dlt.config.value,
     allow_identifier_change_on_table_with_data: bool = None,
+    use_break_path_on_normalize: Optional[bool] = None,
     schema_name: Optional[str] = None,
 ) -> TNormalizersConfig:
-    """Gets explicitly configured normalizers without any defaults or capabilities injection. If `naming`
+    """Gets explicitly onfigured normalizers without any defaults or capabilities injection. If `naming`
     is a module or a type it will get converted into string form via import.
 
     If `schema_name` is present, a section ("sources", schema_name, "schema") is used to inject the config
@@ -57,6 +58,8 @@ def explicit_normalizers(
         norm_conf["allow_identifier_change_on_table_with_data"] = (
             allow_identifier_change_on_table_with_data
         )
+    if use_break_path_on_normalize is not None:
+        norm_conf["use_break_path_on_normalize"] = use_break_path_on_normalize
     return norm_conf
 
 
diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py
index 0dbeda93cf..d6031a08fa 100644
--- a/dlt/common/schema/schema.py
+++ b/dlt/common/schema/schema.py
@@ -57,7 +57,7 @@
     SchemaCorruptedException,
     TableIdentifiersFrozen,
 )
-from dlt.common.schema.normalizers import import_normalizers, explicit_normalizers
+from dlt.common.schema.normalizers import import_normalizers, configured_normalizers
 from dlt.common.schema.exceptions import DataValidationError
 from dlt.common.validation import validate_dict
 
@@ -439,7 +439,8 @@ def update_schema(self, schema: "Schema") -> None:
         """Updates this schema from an incoming schema. Normalizes identifiers after updating normalizers."""
         # pass normalizer config
         self._settings = deepcopy(schema.settings)
-        self._configure_normalizers(schema._normalizers_config)
+        # make shallow copy of normalizer settings
+        self._configure_normalizers(copy(schema._normalizers_config))
         self._compile_settings()
         # update all tables
         for table in schema.tables.values():
@@ -753,7 +754,7 @@ def update_normalizers(self) -> None:
         Default hints, preferred data types and normalize configs (ie. column propagation) are normalized as well. Regexes are included as long
         as textual parts can be extracted from an expression.
         """
-        self._configure_normalizers(explicit_normalizers(schema_name=self._schema_name))
+        self._configure_normalizers(configured_normalizers(schema_name=self._schema_name))
         self._compile_settings()
 
     def will_update_normalizers(self) -> bool:
@@ -761,7 +762,7 @@ def will_update_normalizers(self) -> bool:
 
         # import desired modules
         _, to_naming, _ = import_normalizers(
-            explicit_normalizers(schema_name=self._schema_name), self._normalizers_config
+            configured_normalizers(schema_name=self._schema_name), self._normalizers_config
         )
         return type(to_naming) is not type(self.naming)  # noqa
 
@@ -1106,13 +1107,13 @@ def _verify_identifiers(table: TTableSchema, norm_table: TTableSchema) -> None:
         else:
             return self._schema_tables
 
-    def _renormalize_schema_identifiers(
+    def _replace_and_apply_naming(
         self,
         normalizers_config: TNormalizersConfig,
         to_naming: NamingConvention,
         from_naming: NamingConvention,
     ) -> None:
-        """Normalizes all identifiers in the schema in place"""
+        """Normalizes all identifiers in the schema in place according to `to_naming`"""
         self._schema_tables = self._verify_update_normalizers(
             normalizers_config, to_naming, from_naming
         )
@@ -1140,10 +1141,19 @@ def _renormalize_schema_identifiers(
 
     def _configure_normalizers(self, explicit_normalizers: TNormalizersConfig) -> None:
         """Gets naming and item normalizer from schema yaml, config providers and destination capabilities and applies them to schema."""
+        # preserve current schema settings if not explicitly set in `explicit_normalizers`
+        if explicit_normalizers and self._normalizers_config:
+            for prop_ in [
+                "use_break_path_on_normalize",
+                "allow_identifier_change_on_table_with_data",
+            ]:
+                if prop_ in self._normalizers_config and prop_ not in explicit_normalizers:
+                    explicit_normalizers[prop_] = self._normalizers_config[prop_]  # type: ignore[literal-required]
+
         normalizers_config, to_naming, item_normalizer_class = import_normalizers(
             explicit_normalizers, self._normalizers_config
         )
-        self._renormalize_schema_identifiers(normalizers_config, to_naming, self.naming)
+        self._replace_and_apply_naming(normalizers_config, to_naming, self.naming)
         # data item normalization function
         self.data_item_normalizer = item_normalizer_class(self)
         self.data_item_normalizer.extend_schema()
@@ -1174,7 +1184,7 @@ def _reset_schema(self, name: str, normalizers: TNormalizersConfig = None) -> No
         self._add_standard_hints()
         # configure normalizers, including custom config if present
         if not normalizers:
-            normalizers = explicit_normalizers(schema_name=self._schema_name)
+            normalizers = configured_normalizers(schema_name=self._schema_name)
         self._configure_normalizers(normalizers)
         # add version tables
         self._add_standard_tables()
diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py
index c8f5de03ed..6f5d6213c9 100644
--- a/dlt/common/schema/typing.py
+++ b/dlt/common/schema/typing.py
@@ -28,7 +28,7 @@
 
 
 # current version of schema engine
-SCHEMA_ENGINE_VERSION = 10
+SCHEMA_ENGINE_VERSION = 11
 
 # dlt tables
 VERSION_TABLE_NAME = "_dlt_version"
diff --git a/dlt/common/utils.py b/dlt/common/utils.py
index 3ff23c9bae..58e1dbd824 100644
--- a/dlt/common/utils.py
+++ b/dlt/common/utils.py
@@ -647,3 +647,8 @@ def is_typeerror_due_to_wrong_call(exc: Exception, func: AnyFun) -> bool:
     func_name = func.__name__
     message = str(exc)
     return message.__contains__(f"{func_name}()")
+
+
+removeprefix = getattr(
+    str, "removeprefix", lambda s_, p_: s_[len(p_) :] if s_.startswith(p_) else s_
+)
diff --git a/dlt/destinations/dataset.py b/dlt/destinations/dataset.py
index 411c876c19..27a7f5a7af 100644
--- a/dlt/destinations/dataset.py
+++ b/dlt/destinations/dataset.py
@@ -3,12 +3,8 @@
 from contextlib import contextmanager
 
 from dlt import version
-
 from dlt.common.json import json
-
-from dlt.common.normalizers.naming.naming import NamingConvention
 from dlt.common.exceptions import MissingDependencyException
-
 from dlt.common.destination import AnyDestination
 from dlt.common.destination.reference import (
     SupportsReadableRelation,
@@ -109,7 +105,7 @@ def query(self) -> Any:
             return self._provided_query
 
         table_name = self.sql_client.make_qualified_table_name(
-            self.schema.naming.normalize_path(self._table_name)
+            self.schema.naming.normalize_tables_path(self._table_name)
         )
 
         maybe_limit_clause_1 = ""
diff --git a/dlt/destinations/impl/clickhouse/sql_client.py b/dlt/destinations/impl/clickhouse/sql_client.py
index 00f35da082..a6c4ee0458 100644
--- a/dlt/destinations/impl/clickhouse/sql_client.py
+++ b/dlt/destinations/impl/clickhouse/sql_client.py
@@ -28,6 +28,7 @@
 from dlt.common import logger
 from dlt.common.destination import DestinationCapabilitiesContext
 from dlt.common.typing import DictStrAny
+from dlt.common.utils import removeprefix
 
 from dlt.destinations.exceptions import (
     DatabaseUndefinedRelation,
@@ -88,9 +89,8 @@ def has_dataset(self) -> bool:
         sentinel_table = self.config.dataset_sentinel_table_name
         all_ds_tables = self._list_tables()
         if self.dataset_name:
-            return sentinel_table in [
-                t.split(self.config.dataset_table_separator)[1] for t in all_ds_tables
-            ]
+            prefix = self.dataset_name + self.config.dataset_table_separator
+            return sentinel_table in [removeprefix(t, prefix) for t in all_ds_tables]
         else:
             # if no dataset specified we look for sentinel table
             return sentinel_table in all_ds_tables
diff --git a/dlt/destinations/impl/filesystem/filesystem.py b/dlt/destinations/impl/filesystem/filesystem.py
index 0cf63b3ac9..1739c87fb3 100644
--- a/dlt/destinations/impl/filesystem/filesystem.py
+++ b/dlt/destinations/impl/filesystem/filesystem.py
@@ -37,7 +37,7 @@
     TPipelineStateDoc,
     load_package as current_load_package,
 )
-from dlt.destinations.sql_client import DBApiCursor, WithSqlClient, SqlClientBase
+from dlt.destinations.sql_client import WithSqlClient, SqlClientBase
 from dlt.common.destination import DestinationCapabilitiesContext
 from dlt.common.destination.reference import (
     FollowupJobRequest,
@@ -63,7 +63,6 @@
 from dlt.destinations.impl.filesystem.configuration import FilesystemDestinationClientConfiguration
 from dlt.destinations import path_utils
 from dlt.destinations.fs_client import FSClientBase
-from dlt.destinations.dataset import ReadableDBAPIDataset
 from dlt.destinations.utils import verify_schema_merge_disposition
 
 INIT_FILE_NAME = "init"
diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py
index 41d3035a9f..03f8a31462 100644
--- a/dlt/extract/extractors.py
+++ b/dlt/extract/extractors.py
@@ -18,6 +18,8 @@
     TTableSchemaColumns,
     TPartialTableSchema,
 )
+from dlt.common.normalizers.json import helpers as normalize_helpers
+
 from dlt.extract.hints import HintsMeta, TResourceHints
 from dlt.extract.resource import DltResource
 from dlt.extract.items import DataItemWithMeta, TableNameMeta
@@ -141,7 +143,9 @@ def write_items(self, resource: DltResource, items: TDataItems, meta: Any) -> No
             self._write_to_dynamic_table(resource, items, meta)
 
     def write_empty_items_file(self, table_name: str) -> None:
-        table_name = self.naming.normalize_table_identifier(table_name)
+        table_name = normalize_helpers.normalize_table_identifier(
+            self.schema, self.naming, table_name
+        )
         self.item_storage.write_empty_items_file(self.load_id, self.schema.name, table_name, None)
 
     def _get_static_table_name(self, resource: DltResource, meta: Any) -> Optional[str]:
@@ -151,10 +155,12 @@ def _get_static_table_name(self, resource: DltResource, meta: Any) -> Optional[s
             table_name = meta.table_name
         else:
             table_name = resource.table_name  # type: ignore[assignment]
-        return self.naming.normalize_table_identifier(table_name)
+        return normalize_helpers.normalize_table_identifier(self.schema, self.naming, table_name)
 
     def _get_dynamic_table_name(self, resource: DltResource, item: TDataItem) -> str:
-        return self.naming.normalize_table_identifier(resource._table_name_hint_fun(item))
+        return normalize_helpers.normalize_table_identifier(
+            self.schema, self.naming, resource._table_name_hint_fun(item)
+        )
 
     def _write_item(
         self,
@@ -322,7 +328,7 @@ def write_items(self, resource: DltResource, items: TDataItems, meta: Any) -> No
             )
             for tbl in (
                 (
-                    # 1. Convert pandas frame(s) to arrow Table
+                    # 1. Convert pandas frame(s) to arrow Table, remove indexes because we store
                     pandas_to_arrow(item)
                     if (pandas and isinstance(item, pandas.DataFrame))
                     else item
diff --git a/dlt/normalize/worker.py b/dlt/normalize/worker.py
index 53a856f7d0..5eccdf5433 100644
--- a/dlt/normalize/worker.py
+++ b/dlt/normalize/worker.py
@@ -20,6 +20,7 @@
     ParsedLoadJobFileName,
 )
 from dlt.common.schema import TSchemaUpdate, Schema
+from dlt.common.normalizers.json import helpers as normalize_helpers
 
 from dlt.normalize.configuration import NormalizeConfiguration
 from dlt.normalize.exceptions import NormalizeJobFailed
@@ -218,8 +219,8 @@ def _gather_metrics_and_close(
                 parsed_file_name = ParsedLoadJobFileName.parse(extracted_items_file)
                 # normalize table name in case the normalization changed
                 # NOTE: this is the best we can do, until a full lineage information is in the schema
-                root_table_name = schema.naming.normalize_table_identifier(
-                    parsed_file_name.table_name
+                root_table_name = normalize_helpers.normalize_table_identifier(
+                    schema, schema.naming, parsed_file_name.table_name
                 )
                 root_tables.add(root_table_name)
                 root_table = stored_schema["tables"].get(root_table_name, {"name": root_table_name})
diff --git a/dlt/reflection/script_visitor.py b/dlt/reflection/script_visitor.py
index f4a5569ed0..c49fed20ab 100644
--- a/dlt/reflection/script_visitor.py
+++ b/dlt/reflection/script_visitor.py
@@ -1,10 +1,9 @@
 import inspect
 import ast
-import astunparse
 from ast import NodeVisitor
 from typing import Any, Dict, List
-from dlt.common.reflection.utils import find_outer_func_def
 
+from dlt.common.reflection.utils import find_outer_func_def, ast_unparse
 
 import dlt.reflection.names as n
 
@@ -68,9 +67,9 @@ def visit_FunctionDef(self, node: ast.FunctionDef) -> Any:
             for deco in node.decorator_list:
                 # decorators can be function calls, attributes or names
                 if isinstance(deco, (ast.Name, ast.Attribute)):
-                    alias_name = astunparse.unparse(deco).strip()
+                    alias_name = ast_unparse(deco).strip()
                 elif isinstance(deco, ast.Call):
-                    alias_name = astunparse.unparse(deco.func).strip()
+                    alias_name = ast_unparse(deco.func).strip()
                 else:
                     raise ValueError(
                         self.source_segment(deco), type(deco), "Unknown decorator form"
@@ -87,7 +86,7 @@ def visit_FunctionDef(self, node: ast.FunctionDef) -> Any:
     def visit_Call(self, node: ast.Call) -> Any:
         if self._curr_pass == 2:
             # check if this is a call to any of known functions
-            alias_name = astunparse.unparse(node.func).strip()
+            alias_name = ast_unparse(node.func).strip()
             fn = self.func_aliases.get(alias_name)
             if not fn:
                 # try a fallback to "run" function that may be called on pipeline or source
diff --git a/dlt/sources/sql_database/arrow_helpers.py b/dlt/sources/sql_database/arrow_helpers.py
index 1f72205a2a..1de9dffc87 100644
--- a/dlt/sources/sql_database/arrow_helpers.py
+++ b/dlt/sources/sql_database/arrow_helpers.py
@@ -4,9 +4,6 @@
 
 from dlt.common.configuration import with_config
 from dlt.common.destination import DestinationCapabilitiesContext
-from dlt.common.libs.pyarrow import (
-    row_tuples_to_arrow as _row_tuples_to_arrow,
-)
 
 
 @with_config
@@ -20,6 +17,8 @@ def row_tuples_to_arrow(
     is always the case if run within the pipeline. This will generate arrow schema compatible with the destination.
     Otherwise generic capabilities are used
     """
+    from dlt.common.libs.pyarrow import row_tuples_to_arrow as _row_tuples_to_arrow
+
     return _row_tuples_to_arrow(
         rows, caps or DestinationCapabilitiesContext.generic_capabilities(), columns, tz
     )
diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
index aa0a5fe68a..9b243b9429 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
@@ -181,7 +181,7 @@ bucket_url = "abfss://<container_name>@<storage_account_name>.dfs.core.windows.n
 
 You can use `az`, `abfss`, `azure` and `abfs` url schemes.
 
-If you need to use a custom host to account your storage account you can set it up like below:
+If you need to use a custom host for your storage account, you can set it up like below:
 ```toml
 [destination.filesystem.credentials]
 # The storage account name is always required
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/arrow-pandas.md b/docs/website/docs/dlt-ecosystem/verified-sources/arrow-pandas.md
index 11d4382a22..fa5cf7b128 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/arrow-pandas.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/arrow-pandas.md
@@ -39,6 +39,8 @@ pipeline = dlt.pipeline("orders_pipeline", destination="snowflake")
 
 pipeline.run(df, table_name="orders")
 ```
+Note that Pandas indexes are not save by default (up from `dlt` version 1.4.1). If for some reason you need the destination,
+use `Table.from_pandas` with `preserve_index` set to True to explicitly convert the dataframe into arrow table.
 
 A `pyarrow` table can be loaded in the same way:
 
diff --git a/docs/website/docs/general-usage/naming-convention.md b/docs/website/docs/general-usage/naming-convention.md
index f1766d1797..c10ac3e3d0 100644
--- a/docs/website/docs/general-usage/naming-convention.md
+++ b/docs/website/docs/general-usage/naming-convention.md
@@ -69,6 +69,45 @@ Note that many destinations are exclusively case-insensitive, of which some pres
 ### Identifier shortening
 Identifier shortening happens during normalization. `dlt` takes the maximum length of the identifier from the destination capabilities and will trim the identifiers that are too long. The default shortening behavior generates short deterministic hashes of the source identifiers and places them in the middle of the destination identifier. This (with a high probability) avoids shortened identifier collisions.
 
+### Compound (flattened) identifiers
+`dlt` combines several identifiers in order to name nested tables and flattened columns. For example:
+```json
+{
+  "column":
+    {
+      "value": 1
+    }
+}
+```
+generates flattened column name `column__value`. Where `__` is a path separator (in **snake case**). Each component in the combined identifier is normalized
+separately and shortened as a whole.
+
+:::note
+Combined identifier is also a valid single identifier. Starting from
+`dlt` version above 1.4.0 normalization is fully idempotent and normalized
+`column__value` will be still `column__value`.
+:::
+
+:::caution
+Previously double underscores were contracted into single underscore. That
+prevented using data loaded by `dlt` as a data source without identifier modifications. `dlt` maintains backward compatibility for version >1.4.0 as follows:
+
+* All schemas stored locally or at destination will be migrated to backward compatible mode by setting a flag `use_break_path_on_normalize` ie.:
+```yaml
+normalizers:
+  names: dlt.common.normalizers.names.snake_case
+  use_break_path_on_normalize: true
+  json:
+    module: dlt.common.normalizers.json.relational
+```
+* Backward compatible behavior may be explicitly enabled by setting
+`SCHEMA__USE_BREAK_PATH_ON_NORMALIZE` to `TRUE` or via `config.toml`:
+```toml
+[schema]
+use_break_path_on_normalize=true
+```
+:::
+
 ### 🚧 [WIP] Name convention changes are lossy
 `dlt` does not store the source identifiers in the schema so when the naming convention changes (or we increase the maximum identifier length), it is not able to generate a fully correct set of new identifiers. Instead, it will re-normalize already normalized identifiers. We are currently working to store the full identifier lineage - source identifiers will be stored and mapped to the destination in the schema.
 
diff --git a/mypy.ini b/mypy.ini
index eee4db6126..769e84b13a 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -134,4 +134,4 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 
 [mypy-time_machine.*]
-ignore_missing_imports = True
\ No newline at end of file
+ignore_missing_imports = True
diff --git a/poetry.lock b/poetry.lock
index 9ae26bd04c..732ba0e219 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "about-time"
@@ -13,13 +13,13 @@ files = [
 
 [[package]]
 name = "adlfs"
-version = "2024.4.1"
+version = "2024.7.0"
 description = "Access Azure Datalake Gen1 with fsspec and dask"
 optional = true
 python-versions = ">=3.8"
 files = [
-    {file = "adlfs-2024.4.1-py3-none-any.whl", hash = "sha256:acea94612ddacaa34ea8c6babcc95b8da6982f930cdade7a86fbd17382403e16"},
-    {file = "adlfs-2024.4.1.tar.gz", hash = "sha256:75530a45447f358ae53c5c39c298b8d966dae684be84db899f63b94cd96fc000"},
+    {file = "adlfs-2024.7.0-py3-none-any.whl", hash = "sha256:2005c8e124fda3948f2a6abb2dbebb2c936d2d821acaca6afd61932edfa9bc07"},
+    {file = "adlfs-2024.7.0.tar.gz", hash = "sha256:106995b91f0eb5e775bcd5957d180d9a14faef3271a063b1f65c66fd5ab05ddf"},
 ]
 
 [package.dependencies]
@@ -3900,106 +3900,6 @@ files = [
     {file = "google_re2-1.1-4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f4d4f0823e8b2f6952a145295b1ff25245ce9bb136aff6fe86452e507d4c1dd"},
     {file = "google_re2-1.1-4-cp39-cp39-win32.whl", hash = "sha256:1afae56b2a07bb48cfcfefaa15ed85bae26a68f5dc7f9e128e6e6ea36914e847"},
     {file = "google_re2-1.1-4-cp39-cp39-win_amd64.whl", hash = "sha256:aa7d6d05911ab9c8adbf3c225a7a120ab50fd2784ac48f2f0d140c0b7afc2b55"},
-    {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:222fc2ee0e40522de0b21ad3bc90ab8983be3bf3cec3d349c80d76c8bb1a4beb"},
-    {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d4763b0b9195b72132a4e7de8e5a9bf1f05542f442a9115aa27cfc2a8004f581"},
-    {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:209649da10c9d4a93d8a4d100ecbf9cc3b0252169426bec3e8b4ad7e57d600cf"},
-    {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:68813aa333c1604a2df4a495b2a6ed065d7c8aebf26cc7e7abb5a6835d08353c"},
-    {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:370a23ec775ad14e9d1e71474d56f381224dcf3e72b15d8ca7b4ad7dd9cd5853"},
-    {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:14664a66a3ddf6bc9e56f401bf029db2d169982c53eff3f5876399104df0e9a6"},
-    {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea3722cc4932cbcebd553b69dce1b4a73572823cff4e6a244f1c855da21d511"},
-    {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e14bb264c40fd7c627ef5678e295370cd6ba95ca71d835798b6e37502fc4c690"},
-    {file = "google_re2-1.1-5-cp310-cp310-win32.whl", hash = "sha256:39512cd0151ea4b3969c992579c79b423018b464624ae955be685fc07d94556c"},
-    {file = "google_re2-1.1-5-cp310-cp310-win_amd64.whl", hash = "sha256:ac66537aa3bc5504320d922b73156909e3c2b6da19739c866502f7827b3f9fdf"},
-    {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5b5ea68d54890c9edb1b930dcb2658819354e5d3f2201f811798bbc0a142c2b4"},
-    {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:33443511b6b83c35242370908efe2e8e1e7cae749c766b2b247bf30e8616066c"},
-    {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:413d77bdd5ba0bfcada428b4c146e87707452ec50a4091ec8e8ba1413d7e0619"},
-    {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:5171686e43304996a34baa2abcee6f28b169806d0e583c16d55e5656b092a414"},
-    {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b284db130283771558e31a02d8eb8fb756156ab98ce80035ae2e9e3a5f307c4"},
-    {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:296e6aed0b169648dc4b870ff47bd34c702a32600adb9926154569ef51033f47"},
-    {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38d50e68ead374160b1e656bbb5d101f0b95fb4cc57f4a5c12100155001480c5"},
-    {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a0416a35921e5041758948bcb882456916f22845f66a93bc25070ef7262b72a"},
-    {file = "google_re2-1.1-5-cp311-cp311-win32.whl", hash = "sha256:a1d59568bbb5de5dd56dd6cdc79907db26cce63eb4429260300c65f43469e3e7"},
-    {file = "google_re2-1.1-5-cp311-cp311-win_amd64.whl", hash = "sha256:72f5a2f179648b8358737b2b493549370debd7d389884a54d331619b285514e3"},
-    {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:cbc72c45937b1dc5acac3560eb1720007dccca7c9879138ff874c7f6baf96005"},
-    {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5fadd1417fbef7235fa9453dba4eb102e6e7d94b1e4c99d5fa3dd4e288d0d2ae"},
-    {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:040f85c63cc02696485b59b187a5ef044abe2f99b92b4fb399de40b7d2904ccc"},
-    {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:64e3b975ee6d9bbb2420494e41f929c1a0de4bcc16d86619ab7a87f6ea80d6bd"},
-    {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8ee370413e00f4d828eaed0e83b8af84d7a72e8ee4f4bd5d3078bc741dfc430a"},
-    {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:5b89383001079323f693ba592d7aad789d7a02e75adb5d3368d92b300f5963fd"},
-    {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:63cb4fdfbbda16ae31b41a6388ea621510db82feb8217a74bf36552ecfcd50ad"},
-    {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ebedd84ae8be10b7a71a16162376fd67a2386fe6361ef88c622dcf7fd679daf"},
-    {file = "google_re2-1.1-5-cp312-cp312-win32.whl", hash = "sha256:c8e22d1692bc2c81173330c721aff53e47ffd3c4403ff0cd9d91adfd255dd150"},
-    {file = "google_re2-1.1-5-cp312-cp312-win_amd64.whl", hash = "sha256:5197a6af438bb8c4abda0bbe9c4fbd6c27c159855b211098b29d51b73e4cbcf6"},
-    {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b6727e0b98417e114b92688ad2aa256102ece51f29b743db3d831df53faf1ce3"},
-    {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:711e2b6417eb579c61a4951029d844f6b95b9b373b213232efd413659889a363"},
-    {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:71ae8b3df22c5c154c8af0f0e99d234a450ef1644393bc2d7f53fc8c0a1e111c"},
-    {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:94a04e214bc521a3807c217d50cf099bbdd0c0a80d2d996c0741dbb995b5f49f"},
-    {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:a770f75358508a9110c81a1257721f70c15d9bb592a2fb5c25ecbd13566e52a5"},
-    {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:07c9133357f7e0b17c6694d5dcb82e0371f695d7c25faef2ff8117ef375343ff"},
-    {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:204ca6b1cf2021548f4a9c29ac015e0a4ab0a7b6582bf2183d838132b60c8fda"},
-    {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f0b95857c2c654f419ca684ec38c9c3325c24e6ba7d11910a5110775a557bb18"},
-    {file = "google_re2-1.1-5-cp38-cp38-win32.whl", hash = "sha256:347ac770e091a0364e822220f8d26ab53e6fdcdeaec635052000845c5a3fb869"},
-    {file = "google_re2-1.1-5-cp38-cp38-win_amd64.whl", hash = "sha256:ec32bb6de7ffb112a07d210cf9f797b7600645c2d5910703fa07f456dd2150e0"},
-    {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb5adf89060f81c5ff26c28e261e6b4997530a923a6093c9726b8dec02a9a326"},
-    {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a22630c9dd9ceb41ca4316bccba2643a8b1d5c198f21c00ed5b50a94313aaf10"},
-    {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:544dc17fcc2d43ec05f317366375796351dec44058e1164e03c3f7d050284d58"},
-    {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:19710af5ea88751c7768575b23765ce0dfef7324d2539de576f75cdc319d6654"},
-    {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:f82995a205e08ad896f4bd5ce4847c834fab877e1772a44e5f262a647d8a1dec"},
-    {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:63533c4d58da9dc4bc040250f1f52b089911699f0368e0e6e15f996387a984ed"},
-    {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79e00fcf0cb04ea35a22b9014712d448725ce4ddc9f08cc818322566176ca4b0"},
-    {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc41afcefee2da6c4ed883a93d7f527c4b960cd1d26bbb0020a7b8c2d341a60a"},
-    {file = "google_re2-1.1-5-cp39-cp39-win32.whl", hash = "sha256:486730b5e1f1c31b0abc6d80abe174ce4f1188fe17d1b50698f2bf79dc6e44be"},
-    {file = "google_re2-1.1-5-cp39-cp39-win_amd64.whl", hash = "sha256:4de637ca328f1d23209e80967d1b987d6b352cd01b3a52a84b4d742c69c3da6c"},
-    {file = "google_re2-1.1-6-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:621e9c199d1ff0fdb2a068ad450111a84b3bf14f96dfe5a8a7a0deae5f3f4cce"},
-    {file = "google_re2-1.1-6-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:220acd31e7dde95373f97c3d1f3b3bd2532b38936af28b1917ee265d25bebbf4"},
-    {file = "google_re2-1.1-6-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:db34e1098d164f76251a6ece30e8f0ddfd65bb658619f48613ce71acb3f9cbdb"},
-    {file = "google_re2-1.1-6-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:5152bac41d8073977582f06257219541d0fc46ad99b0bbf30e8f60198a43b08c"},
-    {file = "google_re2-1.1-6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:6191294799e373ee1735af91f55abd23b786bdfd270768a690d9d55af9ea1b0d"},
-    {file = "google_re2-1.1-6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:070cbafbb4fecbb02e98feb28a1eb292fb880f434d531f38cc33ee314b521f1f"},
-    {file = "google_re2-1.1-6-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8437d078b405a59a576cbed544490fe041140f64411f2d91012e8ec05ab8bf86"},
-    {file = "google_re2-1.1-6-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f00f9a9af8896040e37896d9b9fc409ad4979f1ddd85bb188694a7d95ddd1164"},
-    {file = "google_re2-1.1-6-cp310-cp310-win32.whl", hash = "sha256:df26345f229a898b4fd3cafd5f82259869388cee6268fc35af16a8e2293dd4e5"},
-    {file = "google_re2-1.1-6-cp310-cp310-win_amd64.whl", hash = "sha256:3665d08262c57c9b28a5bdeb88632ad792c4e5f417e5645901695ab2624f5059"},
-    {file = "google_re2-1.1-6-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b26b869d8aa1d8fe67c42836bf3416bb72f444528ee2431cfb59c0d3e02c6ce3"},
-    {file = "google_re2-1.1-6-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:41fd4486c57dea4f222a6bb7f1ff79accf76676a73bdb8da0fcbd5ba73f8da71"},
-    {file = "google_re2-1.1-6-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:0ee378e2e74e25960070c338c28192377c4dd41e7f4608f2688064bd2badc41e"},
-    {file = "google_re2-1.1-6-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:a00cdbf662693367b36d075b29feb649fd7ee1b617cf84f85f2deebeda25fc64"},
-    {file = "google_re2-1.1-6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4c09455014217a41499432b8c8f792f25f3df0ea2982203c3a8c8ca0e7895e69"},
-    {file = "google_re2-1.1-6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6501717909185327935c7945e23bb5aa8fc7b6f237b45fe3647fa36148662158"},
-    {file = "google_re2-1.1-6-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3510b04790355f199e7861c29234081900e1e1cbf2d1484da48aa0ba6d7356ab"},
-    {file = "google_re2-1.1-6-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8c0e64c187ca406764f9e9ad6e750d62e69ed8f75bf2e865d0bfbc03b642361c"},
-    {file = "google_re2-1.1-6-cp311-cp311-win32.whl", hash = "sha256:2a199132350542b0de0f31acbb3ca87c3a90895d1d6e5235f7792bb0af02e523"},
-    {file = "google_re2-1.1-6-cp311-cp311-win_amd64.whl", hash = "sha256:83bdac8ceaece8a6db082ea3a8ba6a99a2a1ee7e9f01a9d6d50f79c6f251a01d"},
-    {file = "google_re2-1.1-6-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:81985ff894cd45ab5a73025922ac28c0707759db8171dd2f2cc7a0e856b6b5ad"},
-    {file = "google_re2-1.1-6-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5635af26065e6b45456ccbea08674ae2ab62494008d9202df628df3b267bc095"},
-    {file = "google_re2-1.1-6-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:813b6f04de79f4a8fdfe05e2cb33e0ccb40fe75d30ba441d519168f9d958bd54"},
-    {file = "google_re2-1.1-6-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:5ec2f5332ad4fd232c3f2d6748c2c7845ccb66156a87df73abcc07f895d62ead"},
-    {file = "google_re2-1.1-6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5a687b3b32a6cbb731647393b7c4e3fde244aa557f647df124ff83fb9b93e170"},
-    {file = "google_re2-1.1-6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:39a62f9b3db5d3021a09a47f5b91708b64a0580193e5352751eb0c689e4ad3d7"},
-    {file = "google_re2-1.1-6-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ca0f0b45d4a1709cbf5d21f355e5809ac238f1ee594625a1e5ffa9ff7a09eb2b"},
-    {file = "google_re2-1.1-6-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a64b3796a7a616c7861247bd061c9a836b5caf0d5963e5ea8022125601cf7b09"},
-    {file = "google_re2-1.1-6-cp312-cp312-win32.whl", hash = "sha256:32783b9cb88469ba4cd9472d459fe4865280a6b1acdad4480a7b5081144c4eb7"},
-    {file = "google_re2-1.1-6-cp312-cp312-win_amd64.whl", hash = "sha256:259ff3fd2d39035b9cbcbf375995f83fa5d9e6a0c5b94406ff1cc168ed41d6c6"},
-    {file = "google_re2-1.1-6-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:e4711bcffe190acd29104d8ecfea0c0e42b754837de3fb8aad96e6cc3c613cdc"},
-    {file = "google_re2-1.1-6-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:4d081cce43f39c2e813fe5990e1e378cbdb579d3f66ded5bade96130269ffd75"},
-    {file = "google_re2-1.1-6-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:4f123b54d48450d2d6b14d8fad38e930fb65b5b84f1b022c10f2913bd956f5b5"},
-    {file = "google_re2-1.1-6-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:e1928b304a2b591a28eb3175f9db7f17c40c12cf2d4ec2a85fdf1cc9c073ff91"},
-    {file = "google_re2-1.1-6-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:3a69f76146166aec1173003c1f547931bdf288c6b135fda0020468492ac4149f"},
-    {file = "google_re2-1.1-6-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:fc08c388f4ebbbca345e84a0c56362180d33d11cbe9ccfae663e4db88e13751e"},
-    {file = "google_re2-1.1-6-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b057adf38ce4e616486922f2f47fc7d19c827ba0a7f69d540a3664eba2269325"},
-    {file = "google_re2-1.1-6-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4138c0b933ab099e96f5d8defce4486f7dfd480ecaf7f221f2409f28022ccbc5"},
-    {file = "google_re2-1.1-6-cp38-cp38-win32.whl", hash = "sha256:9693e45b37b504634b1abbf1ee979471ac6a70a0035954592af616306ab05dd6"},
-    {file = "google_re2-1.1-6-cp38-cp38-win_amd64.whl", hash = "sha256:5674d437baba0ea287a5a7f8f81f24265d6ae8f8c09384e2ef7b6f84b40a7826"},
-    {file = "google_re2-1.1-6-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7783137cb2e04f458a530c6d0ee9ef114815c1d48b9102f023998c371a3b060e"},
-    {file = "google_re2-1.1-6-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a49b7153935e7a303675f4deb5f5d02ab1305adefc436071348706d147c889e0"},
-    {file = "google_re2-1.1-6-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:a96a8bb309182090704593c60bdb369a2756b38fe358bbf0d40ddeb99c71769f"},
-    {file = "google_re2-1.1-6-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:dff3d4be9f27ef8ec3705eed54f19ef4ab096f5876c15fe011628c69ba3b561c"},
-    {file = "google_re2-1.1-6-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:40f818b0b39e26811fa677978112a8108269977fdab2ba0453ac4363c35d9e66"},
-    {file = "google_re2-1.1-6-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:8a7e53538cdb40ef4296017acfbb05cab0c19998be7552db1cfb85ba40b171b9"},
-    {file = "google_re2-1.1-6-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ee18e7569fb714e5bb8c42809bf8160738637a5e71ed5a4797757a1fb4dc4de"},
-    {file = "google_re2-1.1-6-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1cda4f6d1a7d5b43ea92bc395f23853fba0caf8b1e1efa6e8c48685f912fcb89"},
-    {file = "google_re2-1.1-6-cp39-cp39-win32.whl", hash = "sha256:6a9cdbdc36a2bf24f897be6a6c85125876dc26fea9eb4247234aec0decbdccfd"},
-    {file = "google_re2-1.1-6-cp39-cp39-win_amd64.whl", hash = "sha256:73f646cecfad7cc5b4330b4192c25f2e29730a3b8408e089ffd2078094208196"},
 ]
 
 [[package]]
@@ -10618,4 +10518,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.13"
-content-hash = "24e262ce6bb496fad6e587c76bb9ad60a2cc45a00f52e368b59978093e57b77c"
+content-hash = "c0607d05ab37a1a6addf3ae7264bf5972cb6ce6e46df1dcdc2da3cff72e5008e"
diff --git a/pyproject.toml b/pyproject.toml
index 638653ffcf..8afb332422 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dlt"
-version = "1.4.1a0"
+version = "1.4.1a1"
 description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run."
 authors = ["dltHub Inc. <services@dlthub.com>"]
 maintainers = [ "Marcin Rudolf <marcin@dlthub.com>", "Adrian Brudaru <adrian@dlthub.com>", "Anton Burnashev <anton@dlthub.com>", "David Scharf <david@dlthub.com>" ]
@@ -40,7 +40,7 @@ click = ">=7.1"
 requirements-parser = ">=0.5.0"
 setuptools = ">=65.6.0"
 humanize = ">=4.4.0"
-astunparse = ">=1.6.3"
+astunparse = { "version" = ">=1.6.3", "python" = "<3.9"}
 gitpython = ">=3.1.29"
 pytz = ">=2022.6"
 giturlparse = ">=0.10.0"
@@ -89,7 +89,6 @@ alembic = {version = ">1.10.0", optional = true}
 paramiko = {version = ">=3.3.0", optional = true}
 sqlglot = {version = ">=20.0.0", optional = true}
 db-dtypes = { version = ">=1.2.0", optional = true }
-aiohttp = { version = ">=3.9", optional = true }
 
 [tool.poetry.extras]
 gcp = ["grpcio", "google-cloud-bigquery", "db-dtypes", "gcsfs"]
diff --git a/tests/common/cases/schemas/eth/ethereum_schema_v11.yml b/tests/common/cases/schemas/eth/ethereum_schema_v11.yml
new file mode 100644
index 0000000000..fd6717c614
--- /dev/null
+++ b/tests/common/cases/schemas/eth/ethereum_schema_v11.yml
@@ -0,0 +1,394 @@
+version: 18
+version_hash: XfkJ8E1tZzG/Sb3lfEZrEVshTMKdB7JpOP2HA7eS6EI=
+engine_version: 11
+name: ethereum
+tables:
+  _dlt_loads:
+    columns:
+      load_id:
+        nullable: false
+        data_type: text
+      schema_name:
+        nullable: true
+        data_type: text
+      status:
+        nullable: false
+        data_type: bigint
+      inserted_at:
+        nullable: false
+        data_type: timestamp
+      schema_version_hash:
+        nullable: true
+        data_type: text
+    write_disposition: skip
+    description: Created by DLT. Tracks completed loads
+    schema_contract: {}
+    resource: _dlt_loads
+  _dlt_version:
+    columns:
+      version:
+        nullable: false
+        data_type: bigint
+      engine_version:
+        nullable: false
+        data_type: bigint
+      inserted_at:
+        nullable: false
+        data_type: timestamp
+      schema_name:
+        nullable: false
+        data_type: text
+      version_hash:
+        nullable: false
+        data_type: text
+      schema:
+        nullable: false
+        data_type: text
+    write_disposition: skip
+    description: Created by DLT. Tracks schema updates
+    schema_contract: {}
+    resource: _dlt_version
+  blocks:
+    description: Ethereum blocks
+    x-annotation: this will be preserved on save
+    write_disposition: append
+    filters:
+      includes: []
+      excludes: []
+    columns:
+      _dlt_load_id:
+        nullable: false
+        description: load id coming from the extractor
+        data_type: text
+      _dlt_id:
+        nullable: false
+        unique: true
+        data_type: text
+        row_key: true
+      number:
+        nullable: false
+        primary_key: true
+        data_type: bigint
+      parent_hash:
+        nullable: true
+        data_type: text
+      hash:
+        nullable: false
+        cluster: true
+        unique: true
+        data_type: text
+      base_fee_per_gas:
+        nullable: false
+        data_type: wei
+      difficulty:
+        nullable: false
+        data_type: wei
+      extra_data:
+        nullable: true
+        data_type: text
+      gas_limit:
+        nullable: false
+        data_type: bigint
+      gas_used:
+        nullable: false
+        data_type: bigint
+      logs_bloom:
+        nullable: true
+        data_type: binary
+      miner:
+        nullable: true
+        data_type: text
+      mix_hash:
+        nullable: true
+        data_type: text
+      nonce:
+        nullable: true
+        data_type: text
+      receipts_root:
+        nullable: true
+        data_type: text
+      sha3_uncles:
+        nullable: true
+        data_type: text
+      size:
+        nullable: true
+        data_type: bigint
+      state_root:
+        nullable: false
+        data_type: text
+      timestamp:
+        nullable: false
+        unique: true
+        sort: true
+        data_type: timestamp
+      total_difficulty:
+        nullable: true
+        data_type: wei
+      transactions_root:
+        nullable: false
+        data_type: text
+    schema_contract: {}
+    resource: blocks
+    x-normalizer:
+      seen-data: true
+  blocks__transactions:
+    columns:
+      _dlt_id:
+        nullable: false
+        unique: true
+        data_type: text
+        row_key: true
+      block_number:
+        nullable: false
+        primary_key: true
+        data_type: bigint
+        merge_key: true
+      transaction_index:
+        nullable: false
+        primary_key: true
+        data_type: bigint
+      hash:
+        nullable: false
+        unique: true
+        data_type: text
+      block_hash:
+        nullable: false
+        cluster: true
+        data_type: text
+      block_timestamp:
+        nullable: false
+        sort: true
+        data_type: timestamp
+      chain_id:
+        nullable: true
+        data_type: text
+      from:
+        nullable: true
+        data_type: text
+      gas:
+        nullable: true
+        data_type: bigint
+      gas_price:
+        nullable: true
+        data_type: bigint
+      input:
+        nullable: true
+        data_type: text
+      max_fee_per_gas:
+        nullable: true
+        data_type: wei
+      max_priority_fee_per_gas:
+        nullable: true
+        data_type: wei
+      nonce:
+        nullable: true
+        data_type: bigint
+      r:
+        nullable: true
+        data_type: text
+      s:
+        nullable: true
+        data_type: text
+      status:
+        nullable: true
+        data_type: bigint
+      to:
+        nullable: true
+        data_type: text
+      type:
+        nullable: true
+        data_type: text
+      v:
+        nullable: true
+        data_type: bigint
+      value:
+        nullable: false
+        data_type: wei
+      eth_value:
+        nullable: true
+        data_type: decimal
+    x-normalizer:
+      seen-data: true
+    write_disposition: append
+    resource: blocks__transactions
+  blocks__transactions__logs:
+    columns:
+      _dlt_id:
+        nullable: false
+        unique: true
+        data_type: text
+        row_key: true
+      address:
+        nullable: false
+        data_type: text
+      block_timestamp:
+        nullable: false
+        sort: true
+        data_type: timestamp
+      block_hash:
+        nullable: false
+        cluster: true
+        data_type: text
+      block_number:
+        nullable: false
+        primary_key: true
+        merge_key: true
+        data_type: bigint
+      transaction_index:
+        nullable: false
+        primary_key: true
+        merge_key: true
+        data_type: bigint
+      log_index:
+        nullable: false
+        primary_key: true
+        data_type: bigint
+      data:
+        nullable: true
+        data_type: text
+      removed:
+        nullable: true
+        data_type: bool
+      transaction_hash:
+        nullable: false
+        data_type: text
+    x-normalizer:
+      seen-data: true
+    write_disposition: append
+    resource: blocks__transactions__logs
+  blocks__transactions__logs__topics:
+    parent: blocks__transactions__logs
+    columns:
+      _dlt_parent_id:
+        nullable: false
+        data_type: text
+        parent_key: true
+      _dlt_list_idx:
+        nullable: false
+        data_type: bigint
+      _dlt_id:
+        nullable: false
+        unique: true
+        data_type: text
+        row_key: true
+      _dlt_root_id:
+        nullable: false
+        root_key: true
+        data_type: text
+      value:
+        nullable: true
+        data_type: text
+    x-normalizer:
+      seen-data: true
+  blocks__transactions__access_list:
+    parent: blocks__transactions
+    columns:
+      _dlt_parent_id:
+        nullable: false
+        data_type: text
+        parent_key: true
+      _dlt_list_idx:
+        nullable: false
+        data_type: bigint
+      _dlt_id:
+        nullable: false
+        unique: true
+        data_type: text
+        row_key: true
+      _dlt_root_id:
+        nullable: false
+        root_key: true
+        data_type: text
+      address:
+        nullable: true
+        data_type: text
+    x-normalizer:
+      seen-data: true
+  blocks__transactions__access_list__storage_keys:
+    parent: blocks__transactions__access_list
+    columns:
+      _dlt_parent_id:
+        nullable: false
+        data_type: text
+        parent_key: true
+      _dlt_list_idx:
+        nullable: false
+        data_type: bigint
+      _dlt_id:
+        nullable: false
+        unique: true
+        data_type: text
+        row_key: true
+      _dlt_root_id:
+        nullable: false
+        root_key: true
+        data_type: text
+      value:
+        nullable: true
+        data_type: text
+    x-normalizer:
+      seen-data: true
+  blocks__uncles:
+    parent: blocks
+    columns:
+      _dlt_parent_id:
+        nullable: false
+        data_type: text
+        parent_key: true
+      _dlt_list_idx:
+        nullable: false
+        data_type: bigint
+      _dlt_id:
+        nullable: false
+        unique: true
+        data_type: text
+        row_key: true
+      _dlt_root_id:
+        nullable: false
+        root_key: true
+        data_type: text
+      value:
+        nullable: true
+        data_type: text
+    x-normalizer:
+      seen-data: true
+settings:
+  default_hints:
+    not_null:
+    - re:^_dlt_id$
+    - _dlt_root_id
+    - _dlt_parent_id
+    - _dlt_list_idx
+    unique:
+    - _dlt_id
+    cluster:
+    - block_hash
+    partition:
+    - block_timestamp
+    root_key:
+    - _dlt_root_id
+    row_key:
+    - _dlt_id
+    parent_key:
+    - _dlt_parent_id
+  preferred_types:
+    timestamp: timestamp
+    block_timestamp: timestamp
+  schema_contract: {}
+normalizers:
+  names: dlt.common.normalizers.names.snake_case
+  json:
+    module: dlt.common.normalizers.json.relational
+    config:
+      propagation:
+        root:
+          _dlt_id: _dlt_root_id
+        tables:
+          blocks:
+            timestamp: block_timestamp
+            hash: block_hash
+previous_hashes:
+- oHfYGTI2GHOxuzwVz6+yvMilXUvHYhxrxkanC2T6MAI=
+- C5An8WClbavalXDdNSqXbdI7Swqh/mTWMcwWKCF//EE=
+- yjMtV4Zv0IJlfR5DPMwuXxGg8BRhy7E79L26XAHWEGE=
+
diff --git a/tests/common/cases/schemas/github/issues.schema.json b/tests/common/cases/schemas/github/issues.schema.json
index 4c4f5425ae..5a1b0c6f84 100644
--- a/tests/common/cases/schemas/github/issues.schema.json
+++ b/tests/common/cases/schemas/github/issues.schema.json
@@ -1,1322 +1,1100 @@
 {
-  "version": 2,
-  "version_hash": "IeCTkq8epwbjSy1O3jdkPPUkTPCt4hLj6RYo8uZ02JI=",
-  "engine_version": 5,
-  "name": "event",
-  "tables": {
-    "_dlt_version": {
-      "name": "_dlt_version",
-      "columns": {
-        "version": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "version",
-          "data_type": "bigint",
-          "nullable": false
-        },
-        "engine_version": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "engine_version",
-          "data_type": "bigint",
-          "nullable": false
-        },
-        "inserted_at": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "inserted_at",
-          "data_type": "timestamp",
-          "nullable": false
-        },
-        "schema_name": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "schema_name",
-          "data_type": "text",
-          "nullable": false
-        },
-        "version_hash": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "version_hash",
-          "data_type": "text",
-          "nullable": false
-        },
-        "schema": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "schema",
-          "data_type": "text",
-          "nullable": false
-        }
+    "version": 3,
+    "version_hash": "o6olKmaCAQVWDWR4eT4aZ1V/RiH+003516xq7Zrva+Q=",
+    "engine_version": 11,
+    "name": "event",
+    "tables": {
+      "_dlt_version": {
+        "columns": {
+          "version": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": false
+          },
+          "engine_version": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": false
+          },
+          "inserted_at": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "timestamp",
+            "nullable": false
+          },
+          "schema_name": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": false
+          },
+          "version_hash": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": false
+          },
+          "schema": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": false
+          }
+        },
+        "write_disposition": "skip",
+        "description": "Created by DLT. Tracks schema updates",
+        "schema_contract": {},
+        "resource": "_dlt_version"
       },
-      "write_disposition": "skip",
-      "description": "Created by DLT. Tracks schema updates"
-    },
-    "_dlt_loads": {
-      "name": "_dlt_loads",
-      "columns": {
-        "load_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "load_id",
-          "data_type": "text",
-          "nullable": false
-        },
-        "schema_name": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "schema_name",
-          "data_type": "text",
-          "nullable": true
-        },
-        "status": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "status",
-          "data_type": "bigint",
-          "nullable": false
-        },
-        "inserted_at": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "inserted_at",
-          "data_type": "timestamp",
-          "nullable": false
-        }
+      "_dlt_loads": {
+        "columns": {
+          "load_id": {
+            "data_type": "text",
+            "nullable": false
+          },
+          "schema_name": {
+            "data_type": "text",
+            "nullable": true
+          },
+          "status": {
+            "data_type": "bigint",
+            "nullable": false
+          },
+          "inserted_at": {
+            "data_type": "timestamp",
+            "nullable": false
+          },
+          "schema_version_hash": {
+            "data_type": "text",
+            "nullable": true
+          }
+        },
+        "write_disposition": "skip",
+        "resource": "_dlt_loads",
+        "description": "Created by DLT. Tracks completed loads",
+        "schema_contract": {}
       },
-      "write_disposition": "skip",
-      "description": "Created by DLT. Tracks completed loads"
-    },
-    "issues": {
-      "name": "issues",
-      "columns": {
-        "url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "repository_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "repository_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "labels_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "labels_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "comments_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "comments_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "events_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "events_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "html_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "html_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "id",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "node_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "node_id",
-          "data_type": "text",
-          "nullable": true
-        },
-        "number": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "number",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "title": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "title",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__login": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__login",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__id",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "user__node_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__node_id",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__avatar_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__avatar_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__gravatar_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__gravatar_id",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__html_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__html_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__followers_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__followers_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__following_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__following_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__gists_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__gists_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__starred_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__starred_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__subscriptions_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__subscriptions_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__organizations_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__organizations_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__repos_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__repos_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__events_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__events_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__received_events_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__received_events_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__type": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__type",
-          "data_type": "text",
-          "nullable": true
-        },
-        "user__site_admin": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "user__site_admin",
-          "data_type": "bool",
-          "nullable": true
-        },
-        "state": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "state",
-          "data_type": "text",
-          "nullable": true
-        },
-        "locked": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "locked",
-          "data_type": "bool",
-          "nullable": true
-        },
-        "assignee__login": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__login",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__id",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "assignee__node_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__node_id",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__avatar_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__avatar_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__gravatar_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__gravatar_id",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__html_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__html_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__followers_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__followers_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__following_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__following_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__gists_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__gists_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__starred_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__starred_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__subscriptions_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__subscriptions_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__organizations_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__organizations_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__repos_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__repos_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__events_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__events_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__received_events_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__received_events_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__type": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__type",
-          "data_type": "text",
-          "nullable": true
-        },
-        "assignee__site_admin": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "assignee__site_admin",
-          "data_type": "bool",
-          "nullable": true
-        },
-        "comments": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "comments",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "created_at": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "created_at",
-          "data_type": "timestamp",
-          "nullable": true
-        },
-        "updated_at": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "updated_at",
-          "data_type": "timestamp",
-          "nullable": true
-        },
-        "closed_at": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "closed_at",
-          "data_type": "timestamp",
-          "nullable": true
-        },
-        "author_association": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "author_association",
-          "data_type": "text",
-          "nullable": true
-        },
-        "body": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "body",
-          "data_type": "text",
-          "nullable": true
-        },
-        "reactions__url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "reactions__url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "reactions__total_count": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "reactions__total_count",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "reactions___1": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "reactions___1",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "reactions__laugh": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "reactions__laugh",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "reactions__hooray": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "reactions__hooray",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "reactions__confused": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "reactions__confused",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "reactions__heart": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "reactions__heart",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "reactions__rocket": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "reactions__rocket",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "reactions__eyes": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "reactions__eyes",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "timeline_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "timeline_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "state_reason": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "state_reason",
-          "data_type": "text",
-          "nullable": true
-        },
-        "_dlt_load_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "_dlt_load_id",
-          "data_type": "text",
-          "nullable": false
-        },
-        "_dlt_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": true,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "_dlt_id",
-          "data_type": "text",
-          "nullable": false
-        },
-        "draft": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "draft",
-          "data_type": "bool",
-          "nullable": true
-        },
-        "pull_request__url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "pull_request__url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "pull_request__html_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "pull_request__html_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "pull_request__diff_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "pull_request__diff_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "pull_request__patch_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "pull_request__patch_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "pull_request__merged_at": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "pull_request__merged_at",
-          "data_type": "timestamp",
-          "nullable": true
-        }
+      "issues": {
+        "columns": {
+          "url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "repository_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "labels_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "comments_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "events_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "html_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "node_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "number": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "title": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__login": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "user__node_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__avatar_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__gravatar_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__html_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__followers_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__following_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__gists_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__starred_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__subscriptions_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__organizations_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__repos_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__events_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__received_events_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__type": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "user__site_admin": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bool",
+            "nullable": true
+          },
+          "state": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "locked": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bool",
+            "nullable": true
+          },
+          "assignee__login": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "assignee__node_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__avatar_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__gravatar_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__html_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__followers_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__following_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__gists_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__starred_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__subscriptions_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__organizations_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__repos_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__events_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__received_events_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__type": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "assignee__site_admin": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bool",
+            "nullable": true
+          },
+          "comments": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "created_at": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "timestamp",
+            "nullable": true
+          },
+          "updated_at": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "timestamp",
+            "nullable": true
+          },
+          "closed_at": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "timestamp",
+            "nullable": true
+          },
+          "author_association": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "body": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "reactions__url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "reactions__total_count": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "reactions___1": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "reactions__laugh": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "reactions__hooray": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "reactions__confused": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "reactions__heart": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "reactions__rocket": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "reactions__eyes": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "timeline_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "state_reason": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "_dlt_load_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": false
+          },
+          "_dlt_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": true,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": false,
+            "row_key": true
+          },
+          "draft": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bool",
+            "nullable": true
+          },
+          "pull_request__url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "pull_request__html_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "pull_request__diff_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "pull_request__patch_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "pull_request__merged_at": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "timestamp",
+            "nullable": true
+          }
+        },
+        "write_disposition": "append",
+        "schema_contract": {},
+        "x-normalizer": {
+          "seen-data": true
+        },
+        "resource": "issues"
       },
-      "write_disposition": "append"
-    },
-    "issues__labels": {
-      "name": "issues__labels",
-      "columns": {
-        "id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "id",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "node_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "node_id",
-          "data_type": "text",
-          "nullable": true
-        },
-        "url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "name": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "name",
-          "data_type": "text",
-          "nullable": true
-        },
-        "color": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "color",
-          "data_type": "text",
-          "nullable": true
-        },
-        "default": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "default",
-          "data_type": "bool",
-          "nullable": true
-        },
-        "description": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "description",
-          "data_type": "text",
-          "nullable": true
-        },
-        "_dlt_parent_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": true,
-          "name": "_dlt_parent_id",
-          "data_type": "text",
-          "nullable": false
-        },
-        "_dlt_list_idx": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "_dlt_list_idx",
-          "data_type": "bigint",
-          "nullable": false
-        },
-        "_dlt_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": true,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "_dlt_id",
-          "data_type": "text",
-          "nullable": false
+      "issues__labels": {
+        "columns": {
+          "id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "node_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "name": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "color": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "default": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bool",
+            "nullable": true
+          },
+          "description": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "_dlt_parent_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": false,
+            "parent_key": true
+          },
+          "_dlt_list_idx": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": false
+          },
+          "_dlt_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": true,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": false,
+            "row_key": true
+          }
+        },
+        "parent": "issues",
+        "x-normalizer": {
+          "seen-data": true
         }
       },
-      "parent": "issues"
-    },
-    "issues__assignees": {
-      "name": "issues__assignees",
-      "columns": {
-        "login": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "login",
-          "data_type": "text",
-          "nullable": true
-        },
-        "id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "id",
-          "data_type": "bigint",
-          "nullable": true
-        },
-        "node_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "node_id",
-          "data_type": "text",
-          "nullable": true
-        },
-        "avatar_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "avatar_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "gravatar_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "gravatar_id",
-          "data_type": "text",
-          "nullable": true
-        },
-        "url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "html_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "html_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "followers_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "followers_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "following_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "following_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "gists_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "gists_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "starred_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "starred_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "subscriptions_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "subscriptions_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "organizations_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "organizations_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "repos_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "repos_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "events_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "events_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "received_events_url": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "received_events_url",
-          "data_type": "text",
-          "nullable": true
-        },
-        "type": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "type",
-          "data_type": "text",
-          "nullable": true
-        },
-        "site_admin": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "site_admin",
-          "data_type": "bool",
-          "nullable": true
-        },
-        "_dlt_parent_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": true,
-          "name": "_dlt_parent_id",
-          "data_type": "text",
-          "nullable": false
-        },
-        "_dlt_list_idx": {
-          "partition": false,
-          "cluster": false,
-          "unique": false,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "_dlt_list_idx",
-          "data_type": "bigint",
-          "nullable": false
-        },
-        "_dlt_id": {
-          "partition": false,
-          "cluster": false,
-          "unique": true,
-          "sort": false,
-          "primary_key": false,
-          "foreign_key": false,
-          "name": "_dlt_id",
-          "data_type": "text",
-          "nullable": false
+      "issues__assignees": {
+        "columns": {
+          "login": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": true
+          },
+          "node_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "avatar_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "gravatar_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "html_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "followers_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "following_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "gists_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "starred_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "subscriptions_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "organizations_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "repos_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "events_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "received_events_url": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "type": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": true
+          },
+          "site_admin": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bool",
+            "nullable": true
+          },
+          "_dlt_parent_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": false,
+            "parent_key": true
+          },
+          "_dlt_list_idx": {
+            "partition": false,
+            "cluster": false,
+            "unique": false,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "bigint",
+            "nullable": false
+          },
+          "_dlt_id": {
+            "partition": false,
+            "cluster": false,
+            "unique": true,
+            "sort": false,
+            "primary_key": false,
+            "data_type": "text",
+            "nullable": false,
+            "row_key": true
+          }
+        },
+        "parent": "issues",
+        "x-normalizer": {
+          "seen-data": true
         }
-      },
-      "parent": "issues"
-    }
-  },
-  "settings": {
-    "detections": [
-      "timestamp",
-      "iso_timestamp",
-      "iso_date"
-    ],
-    "default_hints": {
-      "not_null": [
-        "_dlt_id",
-        "_dlt_root_id",
-        "_dlt_parent_id",
-        "_dlt_list_idx",
-        "_dlt_load_id"
-      ],
-      "foreign_key": [
-        "_dlt_parent_id"
+      }
+    },
+    "settings": {
+      "detections": [
+        "timestamp",
+        "iso_timestamp",
+        "iso_date"
       ],
-      "unique": [
-        "_dlt_id"
-      ]
-    }
-  },
-  "normalizers": {
-    "names": "dlt.common.normalizers.names.snake_case",
-    "json": {
-      "module": "dlt.common.normalizers.json.relational"
-    }
+      "default_hints": {
+        "not_null": [
+          "_dlt_id",
+          "_dlt_root_id",
+          "_dlt_parent_id",
+          "_dlt_list_idx",
+          "_dlt_load_id"
+        ],
+        "unique": [
+          "_dlt_id"
+        ],
+        "row_key": [
+          "_dlt_id"
+        ],
+        "parent_key": [
+          "_dlt_parent_id"
+        ]
+      },
+      "schema_contract": {}
+    },
+    "normalizers": {
+      "names": "dlt.common.normalizers.names.snake_case",
+      "json": {
+        "module": "dlt.common.normalizers.json.relational"
+      }
+    },
+    "previous_hashes": [
+      "IeCTkq8epwbjSy1O3jdkPPUkTPCt4hLj6RYo8uZ02JI="
+    ]
   }
-}
diff --git a/tests/common/normalizers/test_json_relational.py b/tests/common/normalizers/test_json_relational.py
index 1553cea04f..35bc80add2 100644
--- a/tests/common/normalizers/test_json_relational.py
+++ b/tests/common/normalizers/test_json_relational.py
@@ -6,14 +6,12 @@
 from dlt.common.utils import digest128, uniq_id
 from dlt.common.schema import Schema
 from dlt.common.schema.utils import new_table
-
+from dlt.common.normalizers.utils import DLT_ID_LENGTH_BYTES
 from dlt.common.normalizers.json.relational import (
     RelationalNormalizerConfigPropagation,
     DataItemNormalizer as RelationalNormalizer,
-    DLT_ID_LENGTH_BYTES,
 )
-
-# _flatten, _get_child_row_hash, _normalize_row, normalize_data_item,
+from dlt.common.normalizers.json import helpers as normalize_helpers
 
 from tests.utils import create_schema_with_name
 
@@ -420,7 +418,7 @@ def test_list_in_list() -> None:
     schema.update_table(path_table)
     assert "zen__webpath" in schema.tables
     # clear cache with json paths
-    schema.data_item_normalizer._is_nested_type.cache_clear()  # type: ignore[attr-defined]
+    normalize_helpers.is_nested_type.cache_clear()
 
     rows = list(schema.normalize_data_item(chats, "1762162.1212", "zen"))
     # both lists are json types now
@@ -890,7 +888,7 @@ def test_caching_perf(norm: RelationalNormalizer) -> None:
     table["x-normalizer"] = {}
     start = time()
     for _ in range(100000):
-        norm._is_nested_type(norm.schema, "test", "field", 0)
+        normalize_helpers.is_nested_type(norm.schema, "test", "field", 0)
         # norm._get_table_nesting_level(norm.schema, "test")
     print(f"{time() - start}")
 
diff --git a/tests/common/normalizers/test_naming_snake_case.py b/tests/common/normalizers/test_naming_snake_case.py
index ee4f43e7f0..e03de65696 100644
--- a/tests/common/normalizers/test_naming_snake_case.py
+++ b/tests/common/normalizers/test_naming_snake_case.py
@@ -50,6 +50,14 @@ def test_normalize_path(naming_unlimited: NamingConvention) -> None:
     assert naming_unlimited.normalize_path("Small Love Potion") == "small_love_potion"
     assert naming_unlimited.normalize_path("Small  Love  Potion") == "small_love_potion"
 
+    # paths with non normalized underscores
+    # NOTE: empty idents created during break path are removed so underscores are contracted
+    assert (
+        naming_unlimited.normalize_path("Small___Love____Potion_____x")
+        == "small___love__potion___x"
+    )
+    assert naming_unlimited.normalize_path("small___love__potion___x") == "small___love__potion___x"
+
 
 def test_normalize_non_alpha_single_underscore() -> None:
     assert SnakeCaseNamingConvention.RE_NON_ALPHANUMERIC.sub("_", "-=!*") == "_"
diff --git a/tests/common/schema/test_import_normalizers.py b/tests/common/schema/test_import_normalizers.py
index a1e3d775f0..d444259946 100644
--- a/tests/common/schema/test_import_normalizers.py
+++ b/tests/common/schema/test_import_normalizers.py
@@ -16,7 +16,7 @@
 )
 from dlt.common.schema.normalizers import (
     DEFAULT_NAMING_NAMESPACE,
-    explicit_normalizers,
+    configured_normalizers,
     import_normalizers,
     naming_from_reference,
     serialize_reference,
@@ -26,25 +26,25 @@
 
 
 def test_explicit_normalizers() -> None:
-    config = explicit_normalizers()
+    config = configured_normalizers()
     assert config["names"] is None
     assert config["json"] is None
 
     # pass explicit
-    config = explicit_normalizers("direct", {"module": "custom"})
+    config = configured_normalizers("direct", {"module": "custom"})
     assert config["names"] == "direct"
     assert config["json"] == {"module": "custom"}
 
     # pass modules and types, make sure normalizer config is serialized
-    config = explicit_normalizers(direct)
+    config = configured_normalizers(direct)
     assert config["names"] == f"{DEFAULT_NAMING_NAMESPACE}.direct.NamingConvention"
-    config = explicit_normalizers(direct.NamingConvention)
+    config = configured_normalizers(direct.NamingConvention)
     assert config["names"] == f"{DEFAULT_NAMING_NAMESPACE}.direct.NamingConvention"
 
     # use environ
     os.environ["SCHEMA__NAMING"] = "direct"
     os.environ["SCHEMA__JSON_NORMALIZER"] = '{"module": "custom"}'
-    config = explicit_normalizers()
+    config = configured_normalizers()
     assert config["names"] == "direct"
     assert config["json"] == {"module": "custom"}
 
@@ -54,7 +54,7 @@ def test_explicit_normalizers_caps_ignored() -> None:
     destination_caps = DestinationCapabilitiesContext.generic_capabilities()
     destination_caps.naming_convention = "direct"
     with Container().injectable_context(destination_caps):
-        config = explicit_normalizers()
+        config = configured_normalizers()
         assert config["names"] is None
 
 
@@ -121,7 +121,7 @@ def test_naming_from_reference() -> None:
 
 
 def test_import_normalizers() -> None:
-    config, naming, json_normalizer = import_normalizers(explicit_normalizers())
+    config, naming, json_normalizer = import_normalizers(configured_normalizers())
     assert isinstance(naming, snake_case.NamingConvention)
     # no maximum length: we do not know the destination capabilities
     assert naming.max_length is None
@@ -133,7 +133,7 @@ def test_import_normalizers() -> None:
     os.environ["SCHEMA__JSON_NORMALIZER"] = (
         '{"module": "tests.common.normalizers.custom_normalizers"}'
     )
-    config, naming, json_normalizer = import_normalizers(explicit_normalizers())
+    config, naming, json_normalizer = import_normalizers(configured_normalizers())
     assert config["names"] == "direct"
     assert config["json"] == {"module": "tests.common.normalizers.custom_normalizers"}
     assert isinstance(naming, direct.NamingConvention)
@@ -142,7 +142,7 @@ def test_import_normalizers() -> None:
 
 
 def test_import_normalizers_with_defaults() -> None:
-    explicit = explicit_normalizers()
+    explicit = configured_normalizers()
     default_: TNormalizersConfig = {
         "names": "dlt.destinations.impl.weaviate.naming",
         "json": {"module": "tests.common.normalizers.custom_normalizers"},
@@ -170,7 +170,7 @@ def test_config_sections(sections: str) -> None:
     os.environ[f"{sections}SCHEMA__JSON_NORMALIZER"] = (
         '{"module": "tests.common.normalizers.custom_normalizers"}'
     )
-    config, _, _ = import_normalizers(explicit_normalizers(schema_name="test_schema"))
+    config, _, _ = import_normalizers(configured_normalizers(schema_name="test_schema"))
     assert config["names"] == "direct"
     assert config["json"] == {"module": "tests.common.normalizers.custom_normalizers"}
 
@@ -181,11 +181,11 @@ def test_import_normalizers_with_caps() -> None:
     destination_caps.naming_convention = "direct"
     destination_caps.max_identifier_length = 127
     with Container().injectable_context(destination_caps):
-        _, naming, _ = import_normalizers(explicit_normalizers())
+        _, naming, _ = import_normalizers(configured_normalizers())
         assert isinstance(naming, direct.NamingConvention)
         assert naming.max_length == 127
 
-        _, naming, _ = import_normalizers(explicit_normalizers(snake_case))
+        _, naming, _ = import_normalizers(configured_normalizers(snake_case))
         assert isinstance(naming, snake_case.NamingConvention)
         assert naming.max_length == 127
 
@@ -196,23 +196,23 @@ def test_import_normalizers_with_caps() -> None:
     }
     destination_caps.max_table_nesting = 0
     with Container().injectable_context(destination_caps):
-        config, _, relational = import_normalizers(explicit_normalizers())
+        config, _, relational = import_normalizers(configured_normalizers())
         assert config["json"]["config"]["max_nesting"] == 0
         assert relational is RelationalNormalizer
 
         # wrong normalizer
-        config, _, relational = import_normalizers(explicit_normalizers(), default_)
+        config, _, relational = import_normalizers(configured_normalizers(), default_)
         assert "config" not in config["json"]
 
 
 def test_import_invalid_naming_module() -> None:
     with pytest.raises(UnknownNamingModule) as py_ex:
-        import_normalizers(explicit_normalizers("unknown"))
+        import_normalizers(configured_normalizers("unknown"))
     assert py_ex.value.naming_module == "unknown"
     with pytest.raises(UnknownNamingModule) as py_ex:
-        import_normalizers(explicit_normalizers("dlt.common.tests"))
+        import_normalizers(configured_normalizers("dlt.common.tests"))
     assert py_ex.value.naming_module == "dlt.common.tests"
     with pytest.raises(InvalidNamingType) as py_ex2:
-        import_normalizers(explicit_normalizers("dlt.pipeline.helpers"))
+        import_normalizers(configured_normalizers("dlt.pipeline.helpers"))
     assert py_ex2.value.naming_module == "dlt.pipeline"
     assert py_ex2.value.naming_class == "helpers"
diff --git a/tests/common/schema/test_normalize_identifiers.py b/tests/common/schema/test_normalize_identifiers.py
index f84d857e26..a1cb181525 100644
--- a/tests/common/schema/test_normalize_identifiers.py
+++ b/tests/common/schema/test_normalize_identifiers.py
@@ -271,12 +271,7 @@ def test_normalize_table_identifiers_table_reference() -> None:
 
 
 def test_update_normalizers() -> None:
-    schema_dict: TStoredSchema = load_json_case("schemas/github/issues.schema")
-    schema = Schema.from_dict(schema_dict)  # type: ignore[arg-type]
-    # drop seen data
-    del schema.tables["issues"]["x-normalizer"]
-    del schema.tables["issues__labels"]["x-normalizer"]
-    del schema.tables["issues__assignees"]["x-normalizer"]
+    schema = make_issues_schema_for_normalizers_update()
     # save default hints in original form
     default_hints = schema._settings["default_hints"]
 
@@ -307,8 +302,8 @@ def test_normalize_default_hints(schema_storage_no_import: SchemaStorage) -> Non
     from dlt.common.destination import DestinationCapabilitiesContext
     from dlt.common.configuration.container import Container
 
-    eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9")
-    orig_schema = Schema.from_dict(eth_V9)
+    eth_V11 = load_yml_case("schemas/eth/ethereum_schema_v11")
+    orig_schema = Schema.from_dict(eth_V11)
     # save schema
     schema_storage_no_import.save_schema(orig_schema)
 
@@ -317,7 +312,7 @@ def test_normalize_default_hints(schema_storage_no_import: SchemaStorage) -> Non
     ) as caps:
         assert caps.naming_convention is sql_upper
         # creating a schema from dict keeps original normalizers
-        schema = Schema.from_dict(eth_V9)
+        schema = Schema.from_dict(eth_V11)
         assert_schema_identifiers_case(schema, str.lower)
         assert schema._normalizers_config["names"].endswith("snake_case")
 
@@ -350,7 +345,7 @@ def test_normalize_default_hints(schema_storage_no_import: SchemaStorage) -> Non
         )
 
         norm_schema = Schema.from_dict(
-            deepcopy(eth_V9), remove_processing_hints=True, bump_version=False
+            deepcopy(eth_V11), remove_processing_hints=True, bump_version=False
         )
         norm_schema.update_normalizers()
         assert_schema_identifiers_case(norm_schema, str.upper)
@@ -452,3 +447,50 @@ def assert_new_schema_values_custom_normalizers(schema: Schema) -> None:
     assert schema.naming.break_path("A__B__!C") == ["A", "B", "!C"]
     row = list(schema.normalize_data_item({"bool": True}, "load_id", "a_table"))
     assert row[0] == (("a_table", None), {"bool": True})
+
+
+def test_update_schema_normalizer_props() -> None:
+    schema = make_issues_schema_for_normalizers_update()
+    schema_2 = make_issues_schema_for_normalizers_update()
+    # remove issues table
+    del schema_2._schema_tables["issues"]
+    schema_2.update_schema(schema)
+
+    os.environ["SCHEMA__NAMING"] = "tests.common.cases.normalizers.sql_upper"
+    # apply normalizers
+    schema_2.update_normalizers()
+
+    # preserve schema_2 str
+    schema_2_str = schema_2.to_pretty_json()
+
+    # make sure that normalizer props in original schema are preserved
+    schema._normalizers_config["allow_identifier_change_on_table_with_data"] = True
+    schema._normalizers_config["use_break_path_on_normalize"] = True
+
+    # set some fake naming convention. during schema update it should not be used
+    os.environ["SCHEMA__NAMING"] = "tests.common.cases.normalizers.sql_upper_X"
+    schema.update_schema(schema_2)
+    assert isinstance(schema.naming, sql_upper.NamingConvention)
+    assert_schema_identifiers_case(schema, str.upper)
+    # make sure norm setting still in schema
+    assert schema._normalizers_config["allow_identifier_change_on_table_with_data"] is True
+    assert schema._normalizers_config["use_break_path_on_normalize"] is True
+    # schema 2 not modified during the update
+    assert schema_2_str == schema_2.to_pretty_json()
+
+    # make sure that explicit settings are passed
+    schema_2._normalizers_config["allow_identifier_change_on_table_with_data"] = False
+    schema_2._normalizers_config["use_break_path_on_normalize"] = False
+    schema.update_schema(schema_2)
+    assert schema._normalizers_config["allow_identifier_change_on_table_with_data"] is False
+    assert schema._normalizers_config["use_break_path_on_normalize"] is False
+
+
+def make_issues_schema_for_normalizers_update() -> Schema:
+    schema_dict: TStoredSchema = load_json_case("schemas/github/issues.schema")
+    schema = Schema.from_dict(schema_dict)  # type: ignore[arg-type]
+    # drop seen data
+    del schema.tables["issues"]["x-normalizer"]
+    del schema.tables["issues__labels"]["x-normalizer"]
+    del schema.tables["issues__assignees"]["x-normalizer"]
+    return schema
diff --git a/tests/common/schema/test_schema.py b/tests/common/schema/test_schema.py
index 7124ca5c80..5cdd42e448 100644
--- a/tests/common/schema/test_schema.py
+++ b/tests/common/schema/test_schema.py
@@ -570,8 +570,8 @@ def test_update_preferred_types(schema: Schema) -> None:
 
 def test_default_table_resource() -> None:
     """Parent tables without `resource` set default to table name"""
-    eth_v5 = load_yml_case("schemas/eth/ethereum_schema_v5")
-    tables = Schema.from_dict(eth_v5).tables
+    eth_v11 = load_yml_case("schemas/eth/ethereum_schema_v11")
+    tables = Schema.from_dict(eth_v11).tables
 
     assert tables["blocks"]["resource"] == "blocks"
     assert all([t.get("resource") is None for t in tables.values() if t.get("parent")])
@@ -737,7 +737,7 @@ def assert_new_schema_props_custom_normalizers(schema: Schema) -> None:
 def assert_is_new_schema(schema: Schema) -> None:
     assert schema.stored_version is None
     assert schema.stored_version_hash is None
-    assert schema.ENGINE_VERSION == 10
+    assert schema.ENGINE_VERSION == 11
     assert schema._stored_previous_hashes == []
     assert schema.is_modified
     assert schema.is_new
@@ -845,9 +845,9 @@ def test_group_tables_by_resource(schema: Schema) -> None:
 
 
 def test_remove_processing_hints() -> None:
-    eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9")
+    eth_V11 = load_yml_case("schemas/eth/ethereum_schema_v11")
     # here tables contain processing hints
-    schema = Schema.from_dict(eth_V9)
+    schema = Schema.from_dict(eth_V11)
     assert "x-normalizer" in schema.tables["blocks"]
 
     # clone with hints removal, note that clone does not bump version
@@ -867,16 +867,10 @@ def test_remove_processing_hints() -> None:
     assert "x-normalizer" not in to_json
 
     # load without hints
-    no_hints = schema.from_dict(eth_V9, remove_processing_hints=True, bump_version=False)
+    no_hints = schema.from_dict(eth_V11, remove_processing_hints=True, bump_version=False)
     assert no_hints.stored_version_hash == cloned.stored_version_hash
 
     # now load without hints but with version bump
     cloned._bump_version()
-    no_hints = schema.from_dict(eth_V9, remove_processing_hints=True)
+    no_hints = schema.from_dict(eth_V11, remove_processing_hints=True)
     assert no_hints.stored_version_hash == cloned.stored_version_hash
-
-
-# def test_get_new_table_columns() -> None:
-#     pytest.fail(reason="must implement!")
-#     pass
-# get_new_table_columns()
diff --git a/tests/common/schema/test_versioning.py b/tests/common/schema/test_versioning.py
index 39f1ad3211..1577b51115 100644
--- a/tests/common/schema/test_versioning.py
+++ b/tests/common/schema/test_versioning.py
@@ -86,10 +86,10 @@ def test_infer_column_bumps_version() -> None:
 
 
 def test_preserve_version_on_load() -> None:
-    eth_v10: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v10")
-    version = eth_v10["version"]
-    version_hash = eth_v10["version_hash"]
-    schema = Schema.from_dict(eth_v10)  # type: ignore[arg-type]
+    eth_v11: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v11")
+    version = eth_v11["version"]
+    version_hash = eth_v11["version_hash"]
+    schema = Schema.from_dict(eth_v11)  # type: ignore[arg-type]
     # version should not be bumped
     assert version_hash == schema._stored_version_hash
     assert version_hash == schema.version_hash
@@ -98,8 +98,8 @@ def test_preserve_version_on_load() -> None:
 
 @pytest.mark.parametrize("remove_defaults", [True, False])
 def test_version_preserve_on_reload(remove_defaults: bool) -> None:
-    eth_v8: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v8")
-    schema = Schema.from_dict(eth_v8)  # type: ignore[arg-type]
+    eth_v11: TStoredSchema = load_yml_case("schemas/eth/ethereum_schema_v11")
+    schema = Schema.from_dict(eth_v11)  # type: ignore[arg-type]
 
     to_save_dict = schema.to_dict(remove_defaults=remove_defaults)
     assert schema.stored_version == to_save_dict["version"]
diff --git a/tests/common/storages/test_schema_storage.py b/tests/common/storages/test_schema_storage.py
index 0dcf2930de..2818ea9622 100644
--- a/tests/common/storages/test_schema_storage.py
+++ b/tests/common/storages/test_schema_storage.py
@@ -3,7 +3,7 @@
 import yaml
 from dlt.common import json
 
-from dlt.common.schema.normalizers import explicit_normalizers
+from dlt.common.schema.normalizers import configured_normalizers
 from dlt.common.schema.schema import Schema
 from dlt.common.storages.exceptions import (
     InStorageSchemaModified,
@@ -304,7 +304,7 @@ def test_save_store_schema_over_import_sync(synced_storage: SchemaStorage) -> No
 
 
 def test_save_store_schema(storage: SchemaStorage) -> None:
-    d_n = explicit_normalizers()
+    d_n = configured_normalizers()
     d_n["names"] = "tests.common.normalizers.custom_normalizers"
     schema = Schema("column_event", normalizers=d_n)
     assert schema.is_new
@@ -357,16 +357,16 @@ def test_save_initial_import_schema(ie_storage: LiveSchemaStorage) -> None:
         ie_storage.load_schema("ethereum")
 
     # save initial import schema where processing hints are removed
-    eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9")
-    schema = Schema.from_dict(eth_V9)
+    eth_V11 = load_yml_case("schemas/eth/ethereum_schema_v11")
+    schema = Schema.from_dict(eth_V11)
     ie_storage.save_import_schema_if_not_exists(schema)
     # should be available now
     eth = ie_storage.load_schema("ethereum")
     assert "x-normalizer" not in eth.tables["blocks"]
 
     # won't overwrite initial schema
-    del eth_V9["tables"]["blocks__uncles"]
-    schema = Schema.from_dict(eth_V9)
+    del eth_V11["tables"]["blocks__uncles"]
+    schema = Schema.from_dict(eth_V11)
     ie_storage.save_import_schema_if_not_exists(schema)
     # should be available now
     eth = ie_storage.load_schema("ethereum")
diff --git a/tests/common/storages/utils.py b/tests/common/storages/utils.py
index a1334ba1da..5366d8b06f 100644
--- a/tests/common/storages/utils.py
+++ b/tests/common/storages/utils.py
@@ -218,9 +218,9 @@ def assert_package_info(
 
 
 def prepare_eth_import_folder(storage: SchemaStorage) -> Schema:
-    eth_V9 = load_yml_case("schemas/eth/ethereum_schema_v9")
+    eth_V11 = load_yml_case("schemas/eth/ethereum_schema_v11")
     # remove processing hints before installing as import schema
     # ethereum schema is a "dirty" schema with processing hints
-    eth = Schema.from_dict(eth_V9, remove_processing_hints=True)
+    eth = Schema.from_dict(eth_V11, remove_processing_hints=True)
     storage._export_schema(eth, storage.config.import_schema_path)
     return eth
diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py
index e3098a1a77..9eeded1229 100644
--- a/tests/common/test_utils.py
+++ b/tests/common/test_utils.py
@@ -26,6 +26,7 @@
     get_exception_trace,
     get_exception_trace_chain,
     update_dict_nested,
+    removeprefix,
 )
 
 
@@ -440,3 +441,11 @@ def _function_test(a, *, b=None):
     except Exception as exc:
         assert str(exc) == "wrong type"
         assert is_typeerror_due_to_wrong_call(exc, function_typeerror_exc) is False
+
+
+def test_removeprefix() -> None:
+    assert removeprefix("a_data", "a_") == "data"
+    assert removeprefix("a_data", "a_data") == ""
+    assert removeprefix("a_data", "a_data_1") == "a_data"
+    assert removeprefix("", "a_data_1") == ""
+    assert removeprefix("a_data", "") == "a_data"
diff --git a/tests/common/test_validation.py b/tests/common/test_validation.py
index f3ebb02b46..6899d8d5fe 100644
--- a/tests/common/test_validation.py
+++ b/tests/common/test_validation.py
@@ -111,7 +111,7 @@ def test_doc() -> TTestRecord:
 
 def test_validate_schema_cases() -> None:
     with open(
-        "tests/common/cases/schemas/eth/ethereum_schema_v10.yml", mode="r", encoding="utf-8"
+        "tests/common/cases/schemas/eth/ethereum_schema_v11.yml", mode="r", encoding="utf-8"
     ) as f:
         schema_dict: TStoredSchema = yaml.safe_load(f)
 
diff --git a/tests/common/utils.py b/tests/common/utils.py
index 9b5e6bccce..a0760ffe86 100644
--- a/tests/common/utils.py
+++ b/tests/common/utils.py
@@ -19,11 +19,11 @@
 
 def IMPORTED_VERSION_HASH_ETH_V10() -> str:
     # for import schema tests, change when upgrading the schema version
-    eth_V10 = load_yml_case("schemas/eth/ethereum_schema_v10")
-    assert eth_V10["version_hash"] == "veEmgbCPXCIiqyfabeQWwz6UIQ2liETv7LLMpyktCos="
+    eth_V11 = load_yml_case("schemas/eth/ethereum_schema_v11")
+    assert eth_V11["version_hash"] == "XfkJ8E1tZzG/Sb3lfEZrEVshTMKdB7JpOP2HA7eS6EI="
     # remove processing hints before installing as import schema
     # ethereum schema is a "dirty" schema with processing hints
-    eth = Schema.from_dict(eth_V10, remove_processing_hints=True)
+    eth = Schema.from_dict(eth_V11, remove_processing_hints=True)
     return eth.stored_version_hash
 
 
diff --git a/tests/extract/cases/eth_source/ethereum.schema.yaml b/tests/extract/cases/eth_source/ethereum.schema.yaml
index d224088f8b..e20260bfe7 100644
--- a/tests/extract/cases/eth_source/ethereum.schema.yaml
+++ b/tests/extract/cases/eth_source/ethereum.schema.yaml
@@ -1,6 +1,6 @@
 version: 18
-version_hash: veEmgbCPXCIiqyfabeQWwz6UIQ2liETv7LLMpyktCos=
-engine_version: 10
+version_hash: XfkJ8E1tZzG/Sb3lfEZrEVshTMKdB7JpOP2HA7eS6EI=
+engine_version: 11
 name: ethereum
 tables:
   _dlt_loads:
diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py
index 5dc4304a63..a14b4a9602 100644
--- a/tests/extract/test_decorators.py
+++ b/tests/extract/test_decorators.py
@@ -112,9 +112,9 @@ def test_load_schema_for_callable() -> None:
     schema = s.schema
     assert schema.name == "ethereum" == s.name
     # the schema in the associated file has this hash
-    eth_v9 = load_yml_case("schemas/eth/ethereum_schema_v9")
+    eth_v11 = load_yml_case("schemas/eth/ethereum_schema_v11")
     # source removes processing hints so we do
-    reference_schema = Schema.from_dict(eth_v9, remove_processing_hints=True)
+    reference_schema = Schema.from_dict(eth_v11, remove_processing_hints=True)
     assert schema.stored_version_hash == reference_schema.stored_version_hash
 
 
diff --git a/tests/extract/test_incremental.py b/tests/extract/test_incremental.py
index 30df12ae17..725872b621 100644
--- a/tests/extract/test_incremental.py
+++ b/tests/extract/test_incremental.py
@@ -219,8 +219,74 @@ def some_data(created_at=dlt.sources.incremental("created_at")):
     assert rows == [(1, "a"), (2, "b"), (3, "c"), (3, "d"), (3, "e"), (3, "f"), (4, "g")]
 
 
+def test_pandas_index_as_dedup_key() -> None:
+    from dlt.common.libs.pandas import pandas_to_arrow, pandas as pd
+
+    some_data, p = _make_dedup_pipeline("pandas")
+
+    # no index
+    no_index_r = some_data.with_name(new_name="no_index")
+    p.run(no_index_r)
+    p.run(no_index_r)
+    data_ = p._dataset().no_index.arrow()
+    assert data_.schema.names == ["created_at", "id"]
+    assert data_["id"].to_pylist() == ["a", "b", "c", "d", "e", "f", "g"]
+
+    # unnamed index: explicitly converted
+    unnamed_index_r = some_data.with_name(new_name="unnamed_index").add_map(
+        lambda df: pandas_to_arrow(df, preserve_index=True)
+    )
+    # use it (as in arrow table) to deduplicate
+    unnamed_index_r.incremental.primary_key = "__index_level_0__"
+    p.run(unnamed_index_r)
+    p.run(unnamed_index_r)
+    data_ = p._dataset().unnamed_index.arrow()
+    assert data_.schema.names == ["created_at", "id", "index_level_0"]
+    # indexes 2 and 3 are removed from second batch because they were in the previous batch
+    # and the created_at overlapped so they got deduplicated
+    assert data_["index_level_0"].to_pylist() == [0, 1, 2, 3, 4, 0, 1, 4]
+
+    def _make_named_index(df_: pd.DataFrame) -> pd.DataFrame:
+        df_.index = pd.RangeIndex(start=0, stop=len(df_), step=1, name="order_id")
+        return df_
+
+    # named index explicitly converted
+    named_index_r = some_data.with_name(new_name="named_index").add_map(
+        lambda df: pandas_to_arrow(_make_named_index(df), preserve_index=True)
+    )
+    # use it (as in arrow table) to deduplicate
+    named_index_r.incremental.primary_key = "order_id"
+    p.run(named_index_r)
+    p.run(named_index_r)
+    data_ = p._dataset().named_index.arrow()
+    assert data_.schema.names == ["created_at", "id", "order_id"]
+    assert data_["order_id"].to_pylist() == [0, 1, 2, 3, 4, 0, 1, 4]
+
+    # named index explicitly converted
+    named_index_impl_r = some_data.with_name(new_name="named_index_impl").add_map(
+        lambda df: _make_named_index(df)
+    )
+    p.run(named_index_impl_r)
+    p.run(named_index_impl_r)
+    data_ = p._dataset().named_index_impl.arrow()
+    assert data_.schema.names == ["created_at", "id"]
+    assert data_["id"].to_pylist() == ["a", "b", "c", "d", "e", "f", "g"]
+
+
 @pytest.mark.parametrize("item_type", ALL_TEST_DATA_ITEM_FORMATS)
 def test_unique_rows_by_hash_are_deduplicated(item_type: TestDataItemFormat) -> None:
+    some_data, p = _make_dedup_pipeline(item_type)
+    p.run(some_data())
+    p.run(some_data())
+
+    with p.sql_client() as c:
+        with c.execute_query("SELECT created_at, id FROM some_data ORDER BY created_at, id") as cur:
+            rows = cur.fetchall()
+    print(rows)
+    assert rows == [(1, "a"), (2, "b"), (3, "c"), (3, "d"), (3, "e"), (3, "f"), (4, "g")]
+
+
+def _make_dedup_pipeline(item_type: TestDataItemFormat):
     data1 = [
         {"created_at": 1, "id": "a"},
         {"created_at": 2, "id": "b"},
@@ -235,7 +301,6 @@ def test_unique_rows_by_hash_are_deduplicated(item_type: TestDataItemFormat) ->
         {"created_at": 3, "id": "f"},
         {"created_at": 4, "id": "g"},
     ]
-
     source_items1 = data_to_item_format(item_type, data1)
     source_items2 = data_to_item_format(item_type, data2)
 
@@ -250,14 +315,7 @@ def some_data(created_at=dlt.sources.incremental("created_at")):
         pipeline_name=uniq_id(),
         destination=dlt.destinations.duckdb(credentials=duckdb.connect(":memory:")),
     )
-    p.run(some_data())
-    p.run(some_data())
-
-    with p.sql_client() as c:
-        with c.execute_query("SELECT created_at, id FROM some_data order by created_at, id") as cur:
-            rows = cur.fetchall()
-
-    assert rows == [(1, "a"), (2, "b"), (3, "c"), (3, "d"), (3, "e"), (3, "f"), (4, "g")]
+    return some_data, p
 
 
 def test_nested_cursor_path() -> None:
diff --git a/tests/libs/pyarrow/test_pyarrow_normalizer.py b/tests/libs/pyarrow/test_pyarrow_normalizer.py
index 32ee5fdafc..c81d8cd974 100644
--- a/tests/libs/pyarrow/test_pyarrow_normalizer.py
+++ b/tests/libs/pyarrow/test_pyarrow_normalizer.py
@@ -5,12 +5,12 @@
 
 from dlt.common.libs.pyarrow import normalize_py_arrow_item, NameNormalizationCollision
 from dlt.common.schema.utils import new_column, TColumnSchema
-from dlt.common.schema.normalizers import explicit_normalizers, import_normalizers
+from dlt.common.schema.normalizers import configured_normalizers, import_normalizers
 from dlt.common.destination import DestinationCapabilitiesContext
 
 
 def _normalize(table: pa.Table, columns: List[TColumnSchema]) -> pa.Table:
-    _, naming, _ = import_normalizers(explicit_normalizers())
+    _, naming, _ = import_normalizers(configured_normalizers())
     caps = DestinationCapabilitiesContext()
     columns_schema = {c["name"]: c for c in columns}
     return normalize_py_arrow_item(table, columns_schema, naming, caps)
diff --git a/tests/load/clickhouse/test_clickhouse_configuration.py b/tests/load/clickhouse/test_clickhouse_configuration.py
index ad33062f11..eabc3094bd 100644
--- a/tests/load/clickhouse/test_clickhouse_configuration.py
+++ b/tests/load/clickhouse/test_clickhouse_configuration.py
@@ -56,7 +56,8 @@ def test_clickhouse_configuration() -> None:
 
 def test_clickhouse_connection_settings(client: ClickHouseClient) -> None:
     """Test experimental settings are set correctly for the session."""
-    conn = client.sql_client.open_connection()
+    # with client.sql_client.open_connection() as conn:
+    conn = client.sql_client.native_connection
     cursor1 = conn.cursor()
     cursor2 = conn.cursor()
 
@@ -69,3 +70,26 @@ def test_clickhouse_connection_settings(client: ClickHouseClient) -> None:
         assert ("allow_experimental_lightweight_delete", "1") in res
         assert ("enable_http_compression", "1") in res
         assert ("date_time_input_format", "best_effort") in res
+
+
+def test_client_has_dataset(client: ClickHouseClient) -> None:
+    # with client.sql_client as sql_client:
+    assert client.sql_client.has_dataset()
+    separator = client.config.dataset_table_separator
+
+    def _assert_has_dataset() -> None:
+        assert not client.sql_client.has_dataset()
+        client.sql_client.create_dataset()
+        assert client.sql_client.has_dataset()
+        client.sql_client.drop_dataset()
+        assert not client.sql_client.has_dataset()
+
+    try:
+        # change separator
+        client.config.dataset_table_separator = "_"
+        _assert_has_dataset()
+
+        client.config.dataset_table_separator = ""
+        _assert_has_dataset()
+    finally:
+        client.config.dataset_table_separator = separator
diff --git a/tests/load/conftest.py b/tests/load/conftest.py
index 76a7248e5b..c52fea607d 100644
--- a/tests/load/conftest.py
+++ b/tests/load/conftest.py
@@ -9,7 +9,7 @@
     drop_pipeline,
     empty_schema,
 )
-from tests.utils import preserve_environ, patch_home_dir
+from tests.utils import preserve_environ, patch_home_dir, autouse_test_storage
 
 
 @pytest.fixture(scope="function", params=DEFAULT_BUCKETS)
diff --git a/tests/load/duckdb/test_duckdb_client.py b/tests/load/duckdb/test_duckdb_client.py
index a9479a0bb9..49475ce43f 100644
--- a/tests/load/duckdb/test_duckdb_client.py
+++ b/tests/load/duckdb/test_duckdb_client.py
@@ -19,7 +19,7 @@
 from dlt.pipeline.exceptions import PipelineStepFailed
 
 from tests.pipeline.utils import assert_table
-from tests.utils import patch_home_dir, autouse_test_storage, TEST_STORAGE_ROOT
+from tests.utils import autouse_test_storage, TEST_STORAGE_ROOT
 
 # mark all tests as essential, do not remove
 pytestmark = pytest.mark.essential
diff --git a/tests/load/filesystem/test_aws_credentials.py b/tests/load/filesystem/test_aws_credentials.py
index b782e76b7e..1113b9b35d 100644
--- a/tests/load/filesystem/test_aws_credentials.py
+++ b/tests/load/filesystem/test_aws_credentials.py
@@ -9,7 +9,6 @@
 
 from tests.common.configuration.utils import environment
 from tests.load.utils import ALL_FILESYSTEM_DRIVERS
-from tests.utils import autouse_test_storage
 
 # mark all tests as essential, do not remove
 pytestmark = pytest.mark.essential
diff --git a/tests/load/filesystem/test_filesystem_common.py b/tests/load/filesystem/test_filesystem_common.py
index d0a29d03d0..afcd9105a8 100644
--- a/tests/load/filesystem/test_filesystem_common.py
+++ b/tests/load/filesystem/test_filesystem_common.py
@@ -28,7 +28,6 @@
 from tests.common.configuration.utils import environment
 from tests.common.storages.utils import TEST_SAMPLE_FILES, assert_sample_files
 from tests.load.utils import ALL_FILESYSTEM_DRIVERS, AWS_BUCKET, WITH_GDRIVE_BUCKETS
-from tests.utils import autouse_test_storage
 from tests.load.filesystem.utils import self_signed_cert
 
 
diff --git a/tests/load/pipeline/conftest.py b/tests/load/pipeline/conftest.py
index a2ba65494b..80c418ed22 100644
--- a/tests/load/pipeline/conftest.py
+++ b/tests/load/pipeline/conftest.py
@@ -1,2 +1,2 @@
-from tests.utils import autouse_test_storage, duckdb_pipeline_location
+from tests.utils import duckdb_pipeline_location
 from tests.pipeline.utils import drop_dataset_from_env
diff --git a/tests/load/pipeline/test_merge_disposition.py b/tests/load/pipeline/test_merge_disposition.py
index 2925bfac6f..8b6fc751d9 100644
--- a/tests/load/pipeline/test_merge_disposition.py
+++ b/tests/load/pipeline/test_merge_disposition.py
@@ -80,7 +80,7 @@ def test_merge_on_keys_in_schema(
 
     skip_if_not_supported(merge_strategy, p.destination)
 
-    with open("tests/common/cases/schemas/eth/ethereum_schema_v9.yml", "r", encoding="utf-8") as f:
+    with open("tests/common/cases/schemas/eth/ethereum_schema_v11.yml", "r", encoding="utf-8") as f:
         schema = dlt.Schema.from_dict(yaml.safe_load(f))
 
     # make block uncles unseen to trigger filtering loader in loader for nested tables
diff --git a/tests/load/pipeline/test_scd2.py b/tests/load/pipeline/test_scd2.py
index 2a5b9ed296..962c501619 100644
--- a/tests/load/pipeline/test_scd2.py
+++ b/tests/load/pipeline/test_scd2.py
@@ -11,7 +11,7 @@
 from dlt.common.pipeline import LoadInfo
 from dlt.common.data_types.typing import TDataType
 from dlt.common.schema.typing import DEFAULT_VALIDITY_COLUMN_NAMES
-from dlt.common.normalizers.json.relational import DataItemNormalizer
+from dlt.common.normalizers.json.helpers import get_row_hash
 from dlt.common.normalizers.naming.snake_case import NamingConvention as SnakeCaseNamingConvention
 from dlt.common.time import ensure_pendulum_datetime, reduce_pendulum_datetime_precision
 from dlt.extract.resource import DltResource
@@ -30,7 +30,6 @@
 
 from tests.utils import TPythonTableFormat
 
-get_row_hash = DataItemNormalizer.get_row_hash
 FROM, TO = DEFAULT_VALIDITY_COLUMN_NAMES
 
 
diff --git a/tests/load/qdrant/utils.py b/tests/load/qdrant/utils.py
index e96e06be87..8a3b37dd48 100644
--- a/tests/load/qdrant/utils.py
+++ b/tests/load/qdrant/utils.py
@@ -61,6 +61,5 @@ def has_collections(client):
             if has_collections(client):
                 client.drop_storage()
 
-        p._wipe_working_folder()
         # deactivate context
         Container()[PipelineContext].deactivate()
diff --git a/tests/load/redshift/test_redshift_client.py b/tests/load/redshift/test_redshift_client.py
index b60c6a8956..ef0acb33a4 100644
--- a/tests/load/redshift/test_redshift_client.py
+++ b/tests/load/redshift/test_redshift_client.py
@@ -21,7 +21,7 @@
 from dlt.destinations.impl.redshift.redshift import RedshiftClient, psycopg2
 
 from tests.common.utils import COMMON_TEST_CASES_PATH
-from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage, skipifpypy
+from tests.utils import TEST_STORAGE_ROOT, skipifpypy
 from tests.load.utils import expect_load_file, prepare_table, yield_client_with_storage
 
 # mark all tests as essential, do not remove
diff --git a/tests/load/test_job_client.py b/tests/load/test_job_client.py
index 9f64722a1e..6f699436b3 100644
--- a/tests/load/test_job_client.py
+++ b/tests/load/test_job_client.py
@@ -36,7 +36,7 @@
 from dlt.common.time import ensure_pendulum_datetime
 
 from tests.cases import table_update_and_row, assert_all_data_types_row
-from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage
+from tests.utils import TEST_STORAGE_ROOT
 from tests.common.utils import load_json_case
 from tests.load.utils import (
     TABLE_UPDATE,
diff --git a/tests/load/test_read_interfaces.py b/tests/load/test_read_interfaces.py
index f5a8d51baf..1a9c8a383b 100644
--- a/tests/load/test_read_interfaces.py
+++ b/tests/load/test_read_interfaces.py
@@ -10,6 +10,7 @@
 from typing import List
 from functools import reduce
 
+from dlt.common.storages.file_storage import FileStorage
 from tests.load.utils import (
     destinations_configs,
     DestinationTestConfiguration,
@@ -18,7 +19,7 @@
     MEMORY_BUCKET,
 )
 from dlt.destinations import filesystem
-from tests.utils import TEST_STORAGE_ROOT
+from tests.utils import TEST_STORAGE_ROOT, clean_test_storage
 from dlt.common.destination.reference import TDestinationReferenceArg
 from dlt.destinations.dataset import ReadableDBAPIDataset, ReadableRelationUnknownColumnException
 from tests.load.utils import drop_pipeline_data
@@ -48,8 +49,14 @@ def _expected_chunk_count(p: Pipeline) -> List[int]:
     return [_chunk_size(p), _total_records(p) - _chunk_size(p)]
 
 
+# this also disables autouse_test_storage on function level which destroys some tests here
 @pytest.fixture(scope="session")
-def populated_pipeline(request) -> Any:
+def autouse_test_storage() -> FileStorage:
+    return clean_test_storage()
+
+
+@pytest.fixture(scope="session")
+def populated_pipeline(request, autouse_test_storage) -> Any:
     """fixture that returns a pipeline object populated with the example data"""
     destination_config = cast(DestinationTestConfiguration, request.param)
 
diff --git a/tests/load/test_sql_client.py b/tests/load/test_sql_client.py
index 05c10a900f..ee48222da9 100644
--- a/tests/load/test_sql_client.py
+++ b/tests/load/test_sql_client.py
@@ -22,7 +22,7 @@
 from dlt.destinations.typing import TNativeConn
 from dlt.common.time import ensure_pendulum_datetime, to_py_datetime
 
-from tests.utils import TEST_STORAGE_ROOT, autouse_test_storage
+from tests.utils import TEST_STORAGE_ROOT
 from tests.load.utils import (
     yield_client_with_storage,
     prepare_table,
diff --git a/tests/load/weaviate/utils.py b/tests/load/weaviate/utils.py
index b391c2fa38..b98b55fcfa 100644
--- a/tests/load/weaviate/utils.py
+++ b/tests/load/weaviate/utils.py
@@ -95,6 +95,5 @@ def schema_has_classes(client):
             if schema_has_classes(client):
                 client.drop_storage()
 
-        p._wipe_working_folder()
         # deactivate context
         Container()[PipelineContext].deactivate()
diff --git a/tests/pipeline/cases/github_pipeline/github_rev.py b/tests/pipeline/cases/github_pipeline/github_rev.py
new file mode 100644
index 0000000000..4ebe3048f4
--- /dev/null
+++ b/tests/pipeline/cases/github_pipeline/github_rev.py
@@ -0,0 +1,26 @@
+import dlt
+
+
+@dlt.source
+def github():
+    @dlt.resource(
+        table_name="issues__2",
+        primary_key="id",
+    )
+    def load_issues():
+        # return data with path separators
+        yield [
+            {
+                "id": 100,
+                "issue__id": 10,
+            }
+        ]
+
+    return load_issues
+
+
+if __name__ == "__main__":
+    p = dlt.pipeline("dlt_github_pipeline", destination="duckdb", dataset_name="github_3")
+    github_source = github()
+    info = p.run(github_source)
+    print(info)
diff --git a/tests/pipeline/test_dlt_versions.py b/tests/pipeline/test_dlt_versions.py
index a3d8b489c9..fbd4d412b3 100644
--- a/tests/pipeline/test_dlt_versions.py
+++ b/tests/pipeline/test_dlt_versions.py
@@ -484,3 +484,59 @@ def test_scd2_pipeline_update(test_storage: FileStorage) -> None:
                 assert len(issues_retired) == 1
                 assert issues_retired[0][0] == 6272
                 # print(pipeline.default_schema.to_pretty_yaml())
+
+
+def test_normalize_path_separator_legacy_behavior(test_storage: FileStorage) -> None:
+    """Pre 1.4.1 normalized identifiers with path separators into single underscore,
+    this behavior must be preserved if the schema is updated.
+    """
+    shutil.copytree("tests/pipeline/cases/github_pipeline", TEST_STORAGE_ROOT, dirs_exist_ok=True)
+
+    # execute in test storage
+    with set_working_dir(TEST_STORAGE_ROOT):
+        # store dlt data in test storage (like patch_home_dir)
+        with custom_environ({DLT_DATA_DIR: dlt.current.run().data_dir}):
+            # save database outside of pipeline dir
+            with custom_environ(
+                {"DESTINATION__DUCKDB__CREDENTIALS": "duckdb:///test_github_3.duckdb"}
+            ):
+                venv_dir = tempfile.mkdtemp()
+                # create virtual env with (0.3.0) before the current schema upgrade
+                with Venv.create(venv_dir, ["dlt[duckdb]==0.3.0"]) as venv:
+                    venv._install_deps(venv.context, ["duckdb" + "==" + pkg_version("duckdb")])
+                    try:
+                        print(
+                            venv.run_script("../tests/pipeline/cases/github_pipeline/github_rev.py")
+                        )
+                    except CalledProcessError as cpe:
+                        print(f"script stdout: {cpe.stdout}")
+                        print(f"script stderr: {cpe.stderr}")
+                        raise
+
+                venv = Venv.restore_current()
+                # load same data again
+                try:
+                    print(venv.run_script("../tests/pipeline/cases/github_pipeline/github_rev.py"))
+                except CalledProcessError as cpe:
+                    print(f"script stdout: {cpe.stdout}")
+                    print(f"script stderr: {cpe.stderr}")
+                    raise
+                pipeline = dlt.attach(GITHUB_PIPELINE_NAME)
+                print(pipeline.default_schema.to_pretty_yaml())
+                # migration set the backward compat flag
+                assert (
+                    pipeline.default_schema._normalizers_config["use_break_path_on_normalize"]
+                    is False
+                )
+                # make sure that schema didn't change
+                assert pipeline.default_schema.data_table_names() == ["issues_2"]
+                table_ = pipeline.default_schema.tables["issues_2"]
+                assert set(table_["columns"].keys()) == {
+                    "id",
+                    "issue_id",
+                    "_dlt_id",
+                    "_dlt_load_id",
+                }
+                # datasets must be the same
+                data_ = pipeline._dataset().issues_2.select("issue_id", "id").fetchall()
+                print(data_)
diff --git a/tests/normalize/test_max_nesting.py b/tests/pipeline/test_max_nesting.py
similarity index 100%
rename from tests/normalize/test_max_nesting.py
rename to tests/pipeline/test_max_nesting.py
diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py
index 3832bad81a..e58db64e5e 100644
--- a/tests/pipeline/test_pipeline.py
+++ b/tests/pipeline/test_pipeline.py
@@ -1711,6 +1711,111 @@ def nested_resource():
     assert pipeline.last_trace.last_normalize_info.row_counts["flattened_dict__values"] == 4
 
 
+def test_column_name_with_break_path() -> None:
+    """Tests how normalization behaves for names with break path ie __
+    all the names must be idempotent
+    """
+    pipeline = dlt.pipeline(destination="duckdb", pipeline_name="breaking")
+    info = pipeline.run(
+        [{"example_custom_field__c": "custom", "reg_c": "c"}], table_name="custom__path"
+    )
+    assert_load_info(info)
+    # table name was preserved
+    table = pipeline.default_schema.get_table("custom__path")
+    assert pipeline.default_schema.data_table_names() == ["custom__path"]
+    # column name was preserved
+    assert table["columns"]["example_custom_field__c"]["data_type"] == "text"
+    assert set(table["columns"]) == {"example_custom_field__c", "reg_c", "_dlt_id", "_dlt_load_id"}
+
+    # get data
+    assert_data_table_counts(pipeline, {"custom__path": 1})
+    # get data via dataset with dbapi
+    data_ = pipeline._dataset().custom__path[["example_custom_field__c", "reg_c"]].fetchall()
+    assert data_ == [("custom", "c")]
+
+
+def test_column_name_with_break_path_legacy() -> None:
+    """Tests how normalization behaves for names with break path ie __
+    in legacy mode table and column names were normalized as single identifier
+    """
+    os.environ["SCHEMA__USE_BREAK_PATH_ON_NORMALIZE"] = "False"
+    pipeline = dlt.pipeline(destination="duckdb", pipeline_name="breaking")
+    info = pipeline.run(
+        [{"example_custom_field__c": "custom", "reg_c": "c"}], table_name="custom__path"
+    )
+    assert_load_info(info)
+    # table name was contracted
+    table = pipeline.default_schema.get_table("custom_path")
+    assert pipeline.default_schema.data_table_names() == ["custom_path"]
+    # column name was contracted
+    assert table["columns"]["example_custom_field_c"]["data_type"] == "text"
+    assert set(table["columns"]) == {"example_custom_field_c", "reg_c", "_dlt_id", "_dlt_load_id"}
+
+    # get data
+    assert_data_table_counts(pipeline, {"custom_path": 1})
+    # get data via dataset with dbapi
+    data_ = pipeline._dataset().custom_path[["example_custom_field_c", "reg_c"]].fetchall()
+    assert data_ == [("custom", "c")]
+
+
+def test_column_hint_with_break_path() -> None:
+    """Up form the v 1.4.1 name normalizer is idempotent on break path"""
+    now = cast(pendulum.DateTime, pendulum.parse("2024-11-29T10:10"))
+
+    @dlt.resource(
+        name="flattened__dict", columns=[{"name": "value__timestamp", "data_type": "timestamp"}]
+    )
+    def flattened_dict():
+        for delta in range(4):
+            yield {
+                "delta": delta,
+                "value": {"timestamp": now.timestamp() + delta},
+            }
+
+    pipeline = dlt.pipeline(destination="duckdb")
+    info = pipeline.run(flattened_dict())
+    assert_load_info(info)
+
+    assert pipeline.default_schema.data_table_names() == ["flattened__dict"]
+    table = pipeline.default_schema.get_table("flattened__dict")
+    assert set(table["columns"]) == {"delta", "value__timestamp", "_dlt_id", "_dlt_load_id"}
+    assert table["columns"]["value__timestamp"]["data_type"] == "timestamp"
+
+    # make sure data is there
+    data_ = pipeline._dataset().flattened__dict[["delta", "value__timestamp"]].limit(1).fetchall()
+    assert data_ == [(0, now)]
+
+
+def test_column_hint_with_break_path_legacy() -> None:
+    """Up form the v 1.4.1 name normalizer is idempotent on break path"""
+
+    os.environ["SCHEMA__USE_BREAK_PATH_ON_NORMALIZE"] = "False"
+    now = cast(pendulum.DateTime, pendulum.parse("2024-11-29T10:10"))
+
+    @dlt.resource(
+        name="flattened__dict", columns=[{"name": "value__timestamp", "data_type": "timestamp"}]
+    )
+    def flattened_dict():
+        for delta in range(4):
+            yield {
+                "delta": delta,
+                "value": {"timestamp": now.timestamp() + delta},
+            }
+
+    pipeline = dlt.pipeline(destination="duckdb")
+    info = pipeline.run(flattened_dict())
+    assert_load_info(info)
+    # table name contracted
+    assert pipeline.default_schema.data_table_names() == ["flattened_dict"]
+    table = pipeline.default_schema.get_table("flattened_dict")
+    # hint applied
+    assert set(table["columns"]) == {"delta", "value__timestamp", "_dlt_id", "_dlt_load_id"}
+    assert table["columns"]["value__timestamp"]["data_type"] == "timestamp"
+    # make sure data is there
+    data_ = pipeline._dataset().flattened_dict[["delta", "value__timestamp"]].limit(1).fetchall()
+    assert data_ == [(0, now)]
+
+
 def test_empty_rows_are_included() -> None:
     """Empty rows where all values are `None` or empty dicts
     create rows in the dataset with `NULL` in all columns

From b4d807fc059591720f1ea14e73340e9a98041225 Mon Sep 17 00:00:00 2001
From: Marcin Rudolf <rudolfix@rudolfix.org>
Date: Mon, 2 Dec 2024 16:26:01 +0100
Subject: [PATCH 4/4] bumps to version 1.4.1

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8afb332422..7377b03fde 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dlt"
-version = "1.4.1a1"
+version = "1.4.1"
 description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run."
 authors = ["dltHub Inc. <services@dlthub.com>"]
 maintainers = [ "Marcin Rudolf <marcin@dlthub.com>", "Adrian Brudaru <adrian@dlthub.com>", "Anton Burnashev <anton@dlthub.com>", "David Scharf <david@dlthub.com>" ]