diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 35ccb71ab5..317124f8c8 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   get_docs_changes:
     uses: ./.github/workflows/get_docs_changes.yml
@@ -17,9 +21,10 @@ jobs:
     needs: get_docs_changes
     if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
     strategy:
-      fail-fast: false
+      fail-fast: true
       matrix:
-        os: ["ubuntu-latest", "macos-latest", "windows-latest"]
+        os:
+          - ubuntu-latest
         python-version: ["3.8.x", "3.9.x", "3.10.x", "3.11.x"]
 
     defaults:
@@ -75,4 +80,4 @@ jobs:
       - name: Check matrix job results
         if: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')
         run: |
-          echo "One or more matrix job tests failed or were cancelled. You may need to re-run them." && exit 1      
+          echo "One or more matrix job tests failed or were cancelled. You may need to re-run them." && exit 1
diff --git a/.github/workflows/test_airflow.yml b/.github/workflows/test_airflow.yml
index bbed326344..02513618d6 100644
--- a/.github/workflows/test_airflow.yml
+++ b/.github/workflows/test_airflow.yml
@@ -7,6 +7,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   get_docs_changes:
     uses: ./.github/workflows/get_docs_changes.yml
diff --git a/.github/workflows/test_build_images.yml b/.github/workflows/test_build_images.yml
index 9668e23cb3..489d776f40 100644
--- a/.github/workflows/test_build_images.yml
+++ b/.github/workflows/test_build_images.yml
@@ -7,6 +7,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   get_docs_changes:
     uses: ./.github/workflows/get_docs_changes.yml
diff --git a/.github/workflows/test_common.yml b/.github/workflows/test_common.yml
index 2160025ea0..2d96d2eb95 100644
--- a/.github/workflows/test_common.yml
+++ b/.github/workflows/test_common.yml
@@ -1,4 +1,3 @@
-
 name: test common
 
 on:
@@ -8,6 +7,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   RUNTIME__LOG_LEVEL: ERROR
 
@@ -92,6 +95,19 @@ jobs:
         name: Run smoke tests with minimum deps Windows
         shell: cmd
 
+      - name: Install pyarrow
+        run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk
+
+      - run: |
+          poetry run pytest tests/pipeline/test_pipeline_extra.py -k arrow
+        if: runner.os != 'Windows'
+        name: Run pipeline tests with pyarrow but no pandas installed
+      - run: |
+          poetry run pytest tests/pipeline/test_pipeline_extra.py -k arrow
+        if: runner.os == 'Windows'
+        name: Run pipeline tests with pyarrow but no pandas installed Windows
+        shell: cmd
+
       - name: Install pipeline dependencies
         run: poetry install --no-interaction -E duckdb -E cli -E parquet --with sentry-sdk --with pipeline
 
diff --git a/.github/workflows/test_dbt_cloud.yml b/.github/workflows/test_dbt_cloud.yml
index 2d06ac96ba..a123e051e8 100644
--- a/.github/workflows/test_dbt_cloud.yml
+++ b/.github/workflows/test_dbt_cloud.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   # all credentials must be present to be passed to dbt cloud
   DBT_CLOUD__ACCOUNT_ID: ${{ secrets.DBT_CLOUD__ACCOUNT_ID }}
diff --git a/.github/workflows/test_dbt_runner.yml b/.github/workflows/test_dbt_runner.yml
index 5ae791c979..1c425f14e9 100644
--- a/.github/workflows/test_dbt_runner.yml
+++ b/.github/workflows/test_dbt_runner.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
 
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
diff --git a/.github/workflows/test_destination_athena.yml b/.github/workflows/test_destination_athena.yml
index e9e17edefe..b94bdc6ee2 100644
--- a/.github/workflows/test_destination_athena.yml
+++ b/.github/workflows/test_destination_athena.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
 
diff --git a/.github/workflows/test_destination_athena_iceberg.yml b/.github/workflows/test_destination_athena_iceberg.yml
index d77e35f088..acb5f35dfd 100644
--- a/.github/workflows/test_destination_athena_iceberg.yml
+++ b/.github/workflows/test_destination_athena_iceberg.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
 
diff --git a/.github/workflows/test_destination_bigquery.yml b/.github/workflows/test_destination_bigquery.yml
index 00027768a5..d11f7155d4 100644
--- a/.github/workflows/test_destination_bigquery.yml
+++ b/.github/workflows/test_destination_bigquery.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
 
diff --git a/.github/workflows/test_destination_databricks.yml b/.github/workflows/test_destination_databricks.yml
index f301a1b9ed..2a2fa8e10d 100644
--- a/.github/workflows/test_destination_databricks.yml
+++ b/.github/workflows/test_destination_databricks.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
 
diff --git a/.github/workflows/test_destination_mssql.yml b/.github/workflows/test_destination_mssql.yml
index d1da25c067..f96c64219d 100644
--- a/.github/workflows/test_destination_mssql.yml
+++ b/.github/workflows/test_destination_mssql.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
 
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
diff --git a/.github/workflows/test_destination_qdrant.yml b/.github/workflows/test_destination_qdrant.yml
index 758c18b56b..3237801dbf 100644
--- a/.github/workflows/test_destination_qdrant.yml
+++ b/.github/workflows/test_destination_qdrant.yml
@@ -7,6 +7,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
 
@@ -28,7 +32,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: ["ubuntu-latest", "macos-latest", "windows-latest"]
+        os:
+          - ubuntu-latest
     defaults:
       run:
         shell: bash
@@ -64,13 +69,7 @@ jobs:
         run: poetry install --no-interaction -E qdrant -E parquet --with sentry-sdk --with pipeline
       - run: |
           poetry run pytest tests/load/
-        if: runner.os != 'Windows'
-        name: Run tests Linux/MAC
-      - run: |
-          poetry run pytest tests/load/
-        if: runner.os == 'Windows'
-        name: Run tests Windows
-        shell: cmd
+        name: Run tests Linux
 
   matrix_job_required_check:
     name: Qdrant loader tests
diff --git a/.github/workflows/test_destination_snowflake.yml b/.github/workflows/test_destination_snowflake.yml
index 979ea3e917..1ef290682c 100644
--- a/.github/workflows/test_destination_snowflake.yml
+++ b/.github/workflows/test_destination_snowflake.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
 
diff --git a/.github/workflows/test_destination_synapse.yml b/.github/workflows/test_destination_synapse.yml
index ecd890d32a..774c83314f 100644
--- a/.github/workflows/test_destination_synapse.yml
+++ b/.github/workflows/test_destination_synapse.yml
@@ -7,6 +7,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
 
@@ -24,7 +28,7 @@ jobs:
   run_loader:
     name: Tests Synapse loader
     needs: get_docs_changes
-    if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'    
+    if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/test_destinations.yml b/.github/workflows/test_destinations.yml
index c60d870b05..a635e2865c 100644
--- a/.github/workflows/test_destinations.yml
+++ b/.github/workflows/test_destinations.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
 
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
diff --git a/.github/workflows/test_doc_snippets.yml b/.github/workflows/test_doc_snippets.yml
index d73c109894..7a862c5800 100644
--- a/.github/workflows/test_doc_snippets.yml
+++ b/.github/workflows/test_doc_snippets.yml
@@ -8,6 +8,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
 
@@ -54,7 +58,7 @@ jobs:
 
       - name: Install dependencies
         # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-        run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant --with docs,sentry-sdk --without airflow
+        run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant -E bigquery --with docs,sentry-sdk --without airflow
 
       - name: create secrets.toml
         run: pwd && echo "$DLT_SECRETS_TOML" > docs/website/docs/.dlt/secrets.toml
diff --git a/.github/workflows/test_local_destinations.yml b/.github/workflows/test_local_destinations.yml
index a02957b69d..11377095d0 100644
--- a/.github/workflows/test_local_destinations.yml
+++ b/.github/workflows/test_local_destinations.yml
@@ -10,6 +10,10 @@ on:
       - devel
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 env:
   DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index afd0a00d4a..895ad08229 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -12,6 +12,15 @@ Thank you for considering contributing to **dlt**! We appreciate your help in ma
 6. [Publishing (Maintainers Only)](#publishing-maintainers-only)
 7. [Resources](#resources)
 
+## Before You Begin
+
+- **Proposing significant changes or enhancements**: If you're thinking about making significant changes, make sure to [submit an issue](https://github.com/dlt-hub/dlt/issues/new/choose) first. This ensures your efforts align with the project's direction and that you don't invest time on a feature that may not be merged.
+
+- **Fixing bugs**:
+  - **Check existing issues**: search [open issues](https://github.com/dlt-hub/dlt/issues) to see if the bug you've found is already reported.
+    - If **not reported**, [create a new issue](https://github.com/dlt-hub/dlt/issues/new/choose). You're more than welcome to fix it and submit a pull request with your solution. Thank you!
+    - If the bug is **already reported**, please leave a comment on that issue stating you're working on fixing it. This helps keep everyone updated and avoids duplicate efforts.
+
 ## Getting Started
 
 To get started, follow these steps:
diff --git a/Makefile b/Makefile
index 5aa2b2786c..ebf633d1eb 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,8 @@ dev: has-poetry
 	poetry install --all-extras --with airflow --with docs --with providers --with pipeline --with sentry-sdk
 
 lint:
-	./check-package.sh
+	./tools/check-package.sh
+	poetry run python ./tools/check-lockfile.py
 	poetry run mypy --config-file mypy.ini dlt tests
 	poetry run flake8 --max-line-length=200 dlt
 	poetry run flake8 --max-line-length=200 tests --exclude tests/reflection/module_cases
@@ -60,8 +61,9 @@ format:
 	# poetry run isort ./
 
 test-and-lint-snippets:
-	poetry run mypy --config-file mypy.ini docs/website docs/examples
-	poetry run flake8 --max-line-length=200 docs/website docs/examples
+	cd docs/tools && poetry run python check_embedded_snippets.py full
+	poetry run mypy --config-file mypy.ini docs/website docs/examples docs/tools --exclude docs/tools/lint_setup
+	poetry run flake8 --max-line-length=200 docs/website docs/examples docs/tools
 	cd docs/website/docs && poetry run pytest --ignore=node_modules
 
 lint-security:
diff --git a/README.md b/README.md
index 60c5c2f385..5cb681c570 100644
--- a/README.md
+++ b/README.md
@@ -41,6 +41,7 @@ Load chess game data from chess.com API and save it in DuckDB:
 ```python
 import dlt
 from dlt.sources.helpers import requests
+
 # Create a dlt pipeline that will load
 # chess player data to the DuckDB destination
 pipeline = dlt.pipeline(
@@ -48,12 +49,14 @@ pipeline = dlt.pipeline(
     destination='duckdb',
     dataset_name='player_data'
 )
+
 # Grab some player data from Chess.com API
 data = []
 for player in ['magnuscarlsen', 'rpragchess']:
     response = requests.get(f'https://api.chess.com/pub/player/{player}')
     response.raise_for_status()
     data.append(response.json())
+
 # Extract, normalize, and load the data
 pipeline.run(data, table_name='player')
 ```
diff --git a/dlt/__init__.py b/dlt/__init__.py
index e2a6b1a3a7..eee105e47e 100644
--- a/dlt/__init__.py
+++ b/dlt/__init__.py
@@ -29,6 +29,8 @@
 
 from dlt import sources
 from dlt.extract.decorators import source, resource, transformer, defer
+from dlt.destinations.decorators import destination
+
 from dlt.pipeline import (
     pipeline as _pipeline,
     run,
@@ -62,6 +64,7 @@
     "resource",
     "transformer",
     "defer",
+    "destination",
     "pipeline",
     "run",
     "attach",
diff --git a/dlt/cli/_dlt.py b/dlt/cli/_dlt.py
index 9894227046..2332c0286c 100644
--- a/dlt/cli/_dlt.py
+++ b/dlt/cli/_dlt.py
@@ -443,6 +443,12 @@ def main() -> int:
     pipe_cmd.add_argument(
         "--list-pipelines", "-l", default=False, action="store_true", help="List local pipelines"
     )
+    pipe_cmd.add_argument(
+        "--hot-reload",
+        default=False,
+        action="store_true",
+        help="Reload streamlit app (for core development)",
+    )
     pipe_cmd.add_argument("pipeline_name", nargs="?", help="Pipeline name")
     pipe_cmd.add_argument("--pipelines-dir", help="Pipelines working directory", default=None)
     pipe_cmd.add_argument(
diff --git a/dlt/cli/pipeline_command.py b/dlt/cli/pipeline_command.py
index 9981fa8493..0eb73ad7a8 100644
--- a/dlt/cli/pipeline_command.py
+++ b/dlt/cli/pipeline_command.py
@@ -1,5 +1,5 @@
 import yaml
-from typing import Any, Sequence, Tuple
+from typing import Any, Optional, Sequence, Tuple
 import dlt
 from dlt.cli.exceptions import CliCommandException
 
@@ -15,6 +15,7 @@
 
 from dlt.cli import echo as fmt
 
+
 DLT_PIPELINE_COMMAND_DOCS_URL = "https://dlthub.com/docs/reference/command-line-interface"
 
 
@@ -25,6 +26,7 @@ def pipeline_command(
     verbosity: int,
     dataset_name: str = None,
     destination: TDestinationReferenceArg = None,
+    hot_reload: Optional[bool] = False,
     **command_kwargs: Any,
 ) -> None:
     if operation == "list":
@@ -48,7 +50,8 @@ def pipeline_command(
             raise
         fmt.warning(str(e))
         if not fmt.confirm(
-            "Do you want to attempt to restore the pipeline state from destination?", default=False
+            "Do you want to attempt to restore the pipeline state from destination?",
+            default=False,
         ):
             return
         destination = destination or fmt.text_input(
@@ -58,7 +61,10 @@ def pipeline_command(
             f"Enter dataset name for pipeline {fmt.bold(pipeline_name)}"
         )
         p = dlt.pipeline(
-            pipeline_name, pipelines_dir, destination=destination, dataset_name=dataset_name
+            pipeline_name,
+            pipelines_dir,
+            destination=destination,
+            dataset_name=dataset_name,
         )
         p.sync_destination()
         if p.first_run:
@@ -101,13 +107,29 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]:
 
     if operation == "show":
         from dlt.common.runtime import signals
-        from dlt.helpers import streamlit_helper
+        from dlt.helpers.streamlit_app import index
 
         with signals.delayed_signals():
+            streamlit_cmd = [
+                "streamlit",
+                "run",
+                index.__file__,
+                "--client.showSidebarNavigation",
+                "false",
+            ]
+
+            if hot_reload:
+                streamlit_cmd.append("--server.runOnSave")
+                streamlit_cmd.append("true")
+
+            streamlit_cmd.append("--")
+            streamlit_cmd.append(pipeline_name)
+            if pipelines_dir:
+                streamlit_cmd.append("--pipelines-dir")
+                streamlit_cmd.append(pipelines_dir)
+
             venv = Venv.restore_current()
-            for line in iter_stdout(
-                venv, "streamlit", "run", streamlit_helper.__file__, pipeline_name
-            ):
+            for line in iter_stdout(venv, *streamlit_cmd):
                 fmt.echo(line)
 
     if operation == "info":
@@ -255,7 +277,12 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]:
                 tables = remove_defaults({"tables": package_info.schema_update})  # type: ignore
                 fmt.echo(fmt.bold("Schema update:"))
                 fmt.echo(
-                    yaml.dump(tables, allow_unicode=True, default_flow_style=False, sort_keys=False)
+                    yaml.dump(
+                        tables,
+                        allow_unicode=True,
+                        default_flow_style=False,
+                        sort_keys=False,
+                    )
                 )
 
     if operation == "schema":
@@ -288,20 +315,33 @@ def _display_pending_packages() -> Tuple[Sequence[str], Sequence[str]]:
 
         fmt.echo(
             "About to drop the following data in dataset %s in destination %s:"
-            % (fmt.bold(drop.info["dataset_name"]), fmt.bold(p.destination.destination_name))
+            % (
+                fmt.bold(drop.info["dataset_name"]),
+                fmt.bold(p.destination.destination_name),
+            )
         )
         fmt.echo("%s: %s" % (fmt.style("Selected schema", fg="green"), drop.info["schema_name"]))
         fmt.echo(
-            "%s: %s" % (fmt.style("Selected resource(s)", fg="green"), drop.info["resource_names"])
+            "%s: %s"
+            % (
+                fmt.style("Selected resource(s)", fg="green"),
+                drop.info["resource_names"],
+            )
         )
         fmt.echo("%s: %s" % (fmt.style("Table(s) to drop", fg="green"), drop.info["tables"]))
         fmt.echo(
             "%s: %s"
-            % (fmt.style("Resource(s) state to reset", fg="green"), drop.info["resource_states"])
+            % (
+                fmt.style("Resource(s) state to reset", fg="green"),
+                drop.info["resource_states"],
+            )
         )
         fmt.echo(
             "%s: %s"
-            % (fmt.style("Source state path(s) to reset", fg="green"), drop.info["state_paths"])
+            % (
+                fmt.style("Source state path(s) to reset", fg="green"),
+                drop.info["state_paths"],
+            )
         )
         # for k, v in drop.info.items():
         #     fmt.echo("%s: %s" % (fmt.style(k, fg="green"), v))
diff --git a/dlt/common/configuration/__init__.py b/dlt/common/configuration/__init__.py
index b7d868ff8b..8de57f7799 100644
--- a/dlt/common/configuration/__init__.py
+++ b/dlt/common/configuration/__init__.py
@@ -1,7 +1,7 @@
 from .specs.base_configuration import configspec, is_valid_hint, is_secret_hint, resolve_type
 from .specs import known_sections
 from .resolve import resolve_configuration, inject_section
-from .inject import with_config, last_config, get_fun_spec
+from .inject import with_config, last_config, get_fun_spec, create_resolved_partial
 
 from .exceptions import (
     ConfigFieldMissingException,
diff --git a/dlt/common/configuration/container.py b/dlt/common/configuration/container.py
index ad20765489..441b0e21bc 100644
--- a/dlt/common/configuration/container.py
+++ b/dlt/common/configuration/container.py
@@ -1,7 +1,7 @@
-from contextlib import contextmanager
+from contextlib import contextmanager, nullcontext, AbstractContextManager
 import re
 import threading
-from typing import ClassVar, Dict, Iterator, Tuple, Type, TypeVar
+from typing import ClassVar, Dict, Iterator, Tuple, Type, TypeVar, Any
 
 from dlt.common.configuration.specs.base_configuration import ContainerInjectableContext
 from dlt.common.configuration.exceptions import (
@@ -34,6 +34,9 @@ class Container:
 
     thread_contexts: Dict[int, Dict[Type[ContainerInjectableContext], ContainerInjectableContext]]
     """A thread aware mapping of injection context """
+    _context_container_locks: Dict[str, threading.Lock]
+    """Locks for container types on threads."""
+
     main_context: Dict[Type[ContainerInjectableContext], ContainerInjectableContext]
     """Injection context for the main thread"""
 
@@ -41,6 +44,7 @@ def __new__(cls: Type["Container"]) -> "Container":
         if not cls._INSTANCE:
             cls._INSTANCE = super().__new__(cls)
             cls._INSTANCE.thread_contexts = {}
+            cls._INSTANCE._context_container_locks = {}
             cls._INSTANCE.main_context = cls._INSTANCE.thread_contexts[
                 Container._MAIN_THREAD_ID
             ] = {}
@@ -84,22 +88,22 @@ def _thread_context(
         self, spec: Type[TConfiguration]
     ) -> Dict[Type[ContainerInjectableContext], ContainerInjectableContext]:
         if spec.global_affinity:
-            context = self.main_context
+            return self.main_context
         else:
             # thread pool names used in dlt contain originating thread id. use this id over pool id
             if m := re.match(r"dlt-pool-(\d+)-", threading.currentThread().getName()):
                 thread_id = int(m.group(1))
             else:
                 thread_id = threading.get_ident()
+
             # return main context for main thread
             if thread_id == Container._MAIN_THREAD_ID:
                 return self.main_context
             # we may add a new empty thread context so lock here
             with Container._LOCK:
-                context = self.thread_contexts.get(thread_id)
-                if context is None:
+                if (context := self.thread_contexts.get(thread_id)) is None:
                     context = self.thread_contexts[thread_id] = {}
-        return context
+                return context
 
     def _thread_getitem(
         self, spec: Type[TConfiguration]
@@ -127,29 +131,44 @@ def _thread_delitem(
         del context[spec]
 
     @contextmanager
-    def injectable_context(self, config: TConfiguration) -> Iterator[TConfiguration]:
+    def injectable_context(
+        self, config: TConfiguration, lock_context: bool = False
+    ) -> Iterator[TConfiguration]:
         """A context manager that will insert `config` into the container and restore the previous value when it gets out of scope."""
+
         config.resolve()
         spec = type(config)
         previous_config: ContainerInjectableContext = None
-        context, previous_config = self._thread_getitem(spec)
-
-        # set new config and yield context
-        self._thread_setitem(context, spec, config)
-        try:
-            yield config
-        finally:
-            # before setting the previous config for given spec, check if there was no overlapping modification
-            context, current_config = self._thread_getitem(spec)
-            if current_config is config:
-                # config is injected for spec so restore previous
-                if previous_config is None:
-                    self._thread_delitem(context, spec)
+        context = self._thread_context(spec)
+        lock: AbstractContextManager[Any]
+
+        # if there is a lock_id, we need a lock for the lock_id in the scope of the current context
+        if lock_context:
+            lock_key = f"{id(context)}"
+            if (lock := self._context_container_locks.get(lock_key)) is None:
+                with Container._LOCK:
+                    self._context_container_locks[lock_key] = lock = threading.Lock()
+        else:
+            lock = nullcontext()
+
+        with lock:
+            # remember context and set item
+            previous_config = context.get(spec)
+            self._thread_setitem(context, spec, config)
+            try:
+                yield config
+            finally:
+                # before setting the previous config for given spec, check if there was no overlapping modification
+                context, current_config = self._thread_getitem(spec)
+                if current_config is config:
+                    # config is injected for spec so restore previous
+                    if previous_config is None:
+                        self._thread_delitem(context, spec)
+                    else:
+                        self._thread_setitem(context, spec, previous_config)
                 else:
-                    self._thread_setitem(context, spec, previous_config)
-            else:
-                # value was modified in the meantime and not restored
-                raise ContainerInjectableContextMangled(spec, context[spec], config)
+                    # value was modified in the meantime and not restored
+                    raise ContainerInjectableContextMangled(spec, context[spec], config)
 
     @staticmethod
     def thread_pool_prefix() -> str:
diff --git a/dlt/common/configuration/inject.py b/dlt/common/configuration/inject.py
index a22f299ae8..6699826ec8 100644
--- a/dlt/common/configuration/inject.py
+++ b/dlt/common/configuration/inject.py
@@ -1,12 +1,15 @@
 import inspect
+
 from functools import wraps
-from typing import Callable, Dict, Type, Any, Optional, Tuple, TypeVar, overload
+from typing import Callable, Dict, Type, Any, Optional, Tuple, TypeVar, overload, cast
 from inspect import Signature, Parameter
+from contextlib import nullcontext
 
 from dlt.common.typing import DictStrAny, StrAny, TFun, AnyFun
 from dlt.common.configuration.resolve import resolve_configuration, inject_section
 from dlt.common.configuration.specs.base_configuration import BaseConfiguration
 from dlt.common.configuration.specs.config_section_context import ConfigSectionContext
+
 from dlt.common.reflection.spec import spec_from_signature
 
 
@@ -32,6 +35,9 @@ def with_config(
     auto_pipeline_section: bool = False,
     include_defaults: bool = True,
     accept_partial: bool = False,
+    initial_config: BaseConfiguration = None,
+    base: Type[BaseConfiguration] = BaseConfiguration,
+    lock_context_on_injection: bool = True,
 ) -> TFun: ...
 
 
@@ -45,6 +51,9 @@ def with_config(
     auto_pipeline_section: bool = False,
     include_defaults: bool = True,
     accept_partial: bool = False,
+    initial_config: Optional[BaseConfiguration] = None,
+    base: Type[BaseConfiguration] = BaseConfiguration,
+    lock_context_on_injection: bool = True,
 ) -> Callable[[TFun], TFun]: ...
 
 
@@ -58,6 +67,8 @@ def with_config(
     include_defaults: bool = True,
     accept_partial: bool = False,
     initial_config: Optional[BaseConfiguration] = None,
+    base: Type[BaseConfiguration] = BaseConfiguration,
+    lock_context_on_injection: bool = True,
 ) -> Callable[[TFun], TFun]:
     """Injects values into decorated function arguments following the specification in `spec` or by deriving one from function's signature.
 
@@ -71,10 +82,12 @@ def with_config(
         prefer_existing_sections: (bool, optional): When joining existing section context, the existing context will be preferred to the one in `sections`. Default: False
         auto_pipeline_section (bool, optional): If True, a top level pipeline section will be added if `pipeline_name` argument is present . Defaults to False.
         include_defaults (bool, optional): If True then arguments with default values will be included in synthesized spec. If False only the required arguments marked with `dlt.secrets.value` and `dlt.config.value` are included
-
+        base (Type[BaseConfiguration], optional): A base class for synthesized spec. Defaults to BaseConfiguration.
+        lock_context_on_injection (bool, optional): If True, the thread context will be locked during injection to prevent race conditions. Defaults to True.
     Returns:
         Callable[[TFun], TFun]: A decorated function
     """
+
     section_f: Callable[[StrAny], str] = None
     # section may be a function from function arguments to section
     if callable(sections):
@@ -83,20 +96,25 @@ def with_config(
     def decorator(f: TFun) -> TFun:
         SPEC: Type[BaseConfiguration] = None
         sig: Signature = inspect.signature(f)
+        signature_fields: Dict[str, Any]
         kwargs_arg = next(
             (p for p in sig.parameters.values() if p.kind == Parameter.VAR_KEYWORD), None
         )
-        spec_arg: Parameter = None
-        pipeline_name_arg: Parameter = None
-
         if spec is None:
-            SPEC = spec_from_signature(f, sig, include_defaults)
+            SPEC, signature_fields = spec_from_signature(f, sig, include_defaults, base=base)
         else:
             SPEC = spec
+            signature_fields = SPEC.get_resolvable_fields()
 
-        if SPEC is None:
+        # if no signature fields were added we will not wrap `f` for injection
+        if len(signature_fields) == 0:
+            # always register new function
+            _FUNC_SPECS[id(f)] = SPEC
             return f
 
+        spec_arg: Parameter = None
+        pipeline_name_arg: Parameter = None
+
         for p in sig.parameters.values():
             # for all positional parameters that do not have default value, set default
             # if hasattr(SPEC, p.name) and p.default == Parameter.empty:
@@ -109,49 +127,52 @@ def decorator(f: TFun) -> TFun:
                 pipeline_name_arg = p
                 pipeline_name_arg_default = None if p.default == Parameter.empty else p.default
 
-        @wraps(f)
-        def _wrap(*args: Any, **kwargs: Any) -> Any:
+        def resolve_config(bound_args: inspect.BoundArguments) -> BaseConfiguration:
+            """Resolve arguments using the provided spec"""
             # bind parameters to signature
-            bound_args = sig.bind(*args, **kwargs)
             # for calls containing resolved spec in the kwargs, we do not need to resolve again
             config: BaseConfiguration = None
-            if _LAST_DLT_CONFIG in kwargs:
-                config = last_config(**kwargs)
+
+            # if section derivation function was provided then call it
+            if section_f:
+                curr_sections: Tuple[str, ...] = (section_f(bound_args.arguments),)
+                # sections may be a string
+            elif isinstance(sections, str):
+                curr_sections = (sections,)
             else:
-                # if section derivation function was provided then call it
-                if section_f:
-                    curr_sections: Tuple[str, ...] = (section_f(bound_args.arguments),)
-                    # sections may be a string
-                elif isinstance(sections, str):
-                    curr_sections = (sections,)
-                else:
-                    curr_sections = sections
-
-                # if one of arguments is spec the use it as initial value
-                if initial_config:
-                    config = initial_config
-                elif spec_arg:
-                    config = bound_args.arguments.get(spec_arg.name, None)
-                # resolve SPEC, also provide section_context with pipeline_name
-                if pipeline_name_arg:
-                    curr_pipeline_name = bound_args.arguments.get(
-                        pipeline_name_arg.name, pipeline_name_arg_default
-                    )
-                else:
-                    curr_pipeline_name = None
-                section_context = ConfigSectionContext(
-                    pipeline_name=curr_pipeline_name,
-                    sections=curr_sections,
-                    merge_style=sections_merge_style,
+                curr_sections = sections
+
+            # if one of arguments is spec the use it as initial value
+            if initial_config:
+                config = initial_config
+            elif spec_arg:
+                config = bound_args.arguments.get(spec_arg.name, None)
+            # resolve SPEC, also provide section_context with pipeline_name
+            if pipeline_name_arg:
+                curr_pipeline_name = bound_args.arguments.get(
+                    pipeline_name_arg.name, pipeline_name_arg_default
                 )
-                # this may be called from many threads so section_context is thread affine
-                with inject_section(section_context):
-                    # print(f"RESOLVE CONF in inject: {f.__name__}: {section_context.sections} vs {sections}")
-                    config = resolve_configuration(
-                        config or SPEC(),
-                        explicit_value=bound_args.arguments,
-                        accept_partial=accept_partial,
-                    )
+            else:
+                curr_pipeline_name = None
+            section_context = ConfigSectionContext(
+                pipeline_name=curr_pipeline_name,
+                sections=curr_sections,
+                merge_style=sections_merge_style,
+            )
+
+            # this may be called from many threads so section_context is thread affine
+            with inject_section(section_context, lock_context=lock_context_on_injection):
+                # print(f"RESOLVE CONF in inject: {f.__name__}: {section_context.sections} vs {sections}")
+                return resolve_configuration(
+                    config or SPEC(),
+                    explicit_value=bound_args.arguments,
+                    accept_partial=accept_partial,
+                )
+
+        def update_bound_args(
+            bound_args: inspect.BoundArguments, config: BaseConfiguration, args: Any, kwargs: Any
+        ) -> None:
+            # overwrite or add resolved params
             resolved_params = dict(config)
             # overwrite or add resolved params
             for p in sig.parameters.values():
@@ -167,12 +188,56 @@ def _wrap(*args: Any, **kwargs: Any) -> Any:
                 bound_args.arguments[kwargs_arg.name].update(resolved_params)
                 bound_args.arguments[kwargs_arg.name][_LAST_DLT_CONFIG] = config
                 bound_args.arguments[kwargs_arg.name][_ORIGINAL_ARGS] = (args, kwargs)
+
+        def with_partially_resolved_config(config: Optional[BaseConfiguration] = None) -> Any:
+            # creates a pre-resolved partial of the decorated function
+            empty_bound_args = sig.bind_partial()
+            if not config:
+                config = resolve_config(empty_bound_args)
+
+            def wrapped(*args: Any, **kwargs: Any) -> Any:
+                nonlocal config
+
+                # Do we need an exception here?
+                if spec_arg and spec_arg.name in kwargs:
+                    from dlt.common import logger
+
+                    logger.warning(
+                        "Spec argument is provided in kwargs, ignoring it for resolved partial"
+                        " function."
+                    )
+
+                # we can still overwrite the config
+                if _LAST_DLT_CONFIG in kwargs:
+                    config = last_config(**kwargs)
+
+                # call the function with the pre-resolved config
+                bound_args = sig.bind(*args, **kwargs)
+                update_bound_args(bound_args, config, args, kwargs)
+                return f(*bound_args.args, **bound_args.kwargs)
+
+            return wrapped
+
+        @wraps(f)
+        def _wrap(*args: Any, **kwargs: Any) -> Any:
+            # Resolve config
+            config: BaseConfiguration = None
+            bound_args = sig.bind(*args, **kwargs)
+            if _LAST_DLT_CONFIG in kwargs:
+                config = last_config(**kwargs)
+            else:
+                config = resolve_config(bound_args)
+
             # call the function with resolved config
+            update_bound_args(bound_args, config, args, kwargs)
             return f(*bound_args.args, **bound_args.kwargs)
 
         # register the spec for a wrapped function
         _FUNC_SPECS[id(_wrap)] = SPEC
 
+        # add a method to create a pre-resolved partial
+        setattr(_wrap, "__RESOLVED_PARTIAL_FUNC__", with_partially_resolved_config)  # noqa: B010
+
         return _wrap  # type: ignore
 
     # See if we're being called as @with_config or @with_config().
@@ -197,3 +262,10 @@ def last_config(**kwargs: Any) -> Any:
 
 def get_orig_args(**kwargs: Any) -> Tuple[Tuple[Any], DictStrAny]:
     return kwargs[_ORIGINAL_ARGS]  # type: ignore
+
+
+def create_resolved_partial(f: AnyFun, config: Optional[BaseConfiguration] = None) -> AnyFun:
+    """Create a pre-resolved partial of the with_config decorated function"""
+    if partial_func := getattr(f, "__RESOLVED_PARTIAL_FUNC__", None):
+        return cast(AnyFun, partial_func(config))
+    return f
diff --git a/dlt/common/configuration/resolve.py b/dlt/common/configuration/resolve.py
index db69cd9572..b398f0463a 100644
--- a/dlt/common/configuration/resolve.py
+++ b/dlt/common/configuration/resolve.py
@@ -92,13 +92,14 @@ def initialize_credentials(hint: Any, initial_value: Any) -> CredentialsConfigur
 
 
 def inject_section(
-    section_context: ConfigSectionContext, merge_existing: bool = True
+    section_context: ConfigSectionContext, merge_existing: bool = True, lock_context: bool = False
 ) -> ContextManager[ConfigSectionContext]:
     """Context manager that sets section specified in `section_context` to be used during configuration resolution. Optionally merges the context already in the container with the one provided
 
     Args:
         section_context (ConfigSectionContext): Instance providing a pipeline name and section context
         merge_existing (bool, optional): Merges existing section context with `section_context` in the arguments by executing `merge_style` function on `section_context`. Defaults to True.
+        lock_context (bool, optional): Instruct to threadlock the current thread to prevent race conditions in context injection.
 
     Default Merge Style:
         Gets `pipeline_name` and `sections` from existing context if they are not provided in `section_context` argument.
@@ -112,7 +113,7 @@ def inject_section(
     if merge_existing:
         section_context.merge(existing_context)
 
-    return container.injectable_context(section_context)
+    return container.injectable_context(section_context, lock_context=lock_context)
 
 
 def _maybe_parse_native_value(
diff --git a/dlt/common/configuration/specs/base_configuration.py b/dlt/common/configuration/specs/base_configuration.py
index 84f59fa894..62abf42f27 100644
--- a/dlt/common/configuration/specs/base_configuration.py
+++ b/dlt/common/configuration/specs/base_configuration.py
@@ -2,6 +2,7 @@
 import inspect
 import contextlib
 import dataclasses
+
 from collections.abc import Mapping as C_Mapping
 from typing import (
     Callable,
diff --git a/dlt/common/data_types/type_helpers.py b/dlt/common/data_types/type_helpers.py
index 659b4951df..61a0aa1dbf 100644
--- a/dlt/common/data_types/type_helpers.py
+++ b/dlt/common/data_types/type_helpers.py
@@ -7,7 +7,7 @@
 from enum import Enum
 
 from dlt.common import pendulum, json, Decimal, Wei
-from dlt.common.json import custom_pua_remove
+from dlt.common.json import custom_pua_remove, json
 from dlt.common.json._simplejson import custom_encode as json_custom_encode
 from dlt.common.arithmetics import InvalidOperation
 from dlt.common.data_types.typing import TDataType
@@ -105,6 +105,14 @@ def coerce_value(to_type: TDataType, from_type: TDataType, value: Any) -> Any:
                 return int(value.value)
         return value
 
+    if to_type == "complex":
+        # try to coerce from text
+        if from_type == "text":
+            try:
+                return json.loads(value)
+            except Exception:
+                raise ValueError(value)
+
     if to_type == "text":
         if from_type == "complex":
             return complex_to_str(value)
diff --git a/dlt/common/destination/capabilities.py b/dlt/common/destination/capabilities.py
index a78a31fdf3..36a9cc3b6e 100644
--- a/dlt/common/destination/capabilities.py
+++ b/dlt/common/destination/capabilities.py
@@ -19,7 +19,7 @@
 ]
 ALL_SUPPORTED_FILE_FORMATS: Set[TLoaderFileFormat] = set(get_args(TLoaderFileFormat))
 # file formats used internally by dlt
-INTERNAL_LOADER_FILE_FORMATS: Set[TLoaderFileFormat] = {"puae-jsonl", "sql", "reference", "arrow"}
+INTERNAL_LOADER_FILE_FORMATS: Set[TLoaderFileFormat] = {"sql", "reference", "arrow"}
 # file formats that may be chosen by the user
 EXTERNAL_LOADER_FILE_FORMATS: Set[TLoaderFileFormat] = (
     set(get_args(TLoaderFileFormat)) - INTERNAL_LOADER_FILE_FORMATS
@@ -55,6 +55,7 @@ class DestinationCapabilitiesContext(ContainerInjectableContext):
     insert_values_writer_type: str = "default"
     supports_multiple_statements: bool = True
     supports_clone_table: bool = False
+    max_table_nesting: Optional[int] = None  # destination can overwrite max table nesting
     """Destination supports CREATE TABLE ... CLONE ... statements"""
 
     # do not allow to create default value, destination caps must be always explicitly inserted into container
diff --git a/dlt/common/destination/reference.py b/dlt/common/destination/reference.py
index 5e698347e5..258efd80be 100644
--- a/dlt/common/destination/reference.py
+++ b/dlt/common/destination/reference.py
@@ -260,6 +260,27 @@ def create_followup_jobs(self, final_state: TLoadJobState) -> List[NewLoadJob]:
         return []
 
 
+class DoNothingJob(LoadJob):
+    """The most lazy class of dlt"""
+
+    def __init__(self, file_path: str) -> None:
+        super().__init__(FileStorage.get_file_name_from_file_path(file_path))
+
+    def state(self) -> TLoadJobState:
+        # this job is always done
+        return "completed"
+
+    def exception(self) -> str:
+        # this part of code should be never reached
+        raise NotImplementedError()
+
+
+class DoNothingFollowupJob(DoNothingJob, FollowupJob):
+    """The second most lazy class of dlt"""
+
+    pass
+
+
 class JobClientBase(ABC):
     capabilities: ClassVar[DestinationCapabilitiesContext] = None
 
diff --git a/dlt/common/libs/pandas.py b/dlt/common/libs/pandas.py
index 93e6b764bc..7a94dcf6e2 100644
--- a/dlt/common/libs/pandas.py
+++ b/dlt/common/libs/pandas.py
@@ -1,7 +1,14 @@
+from typing import Any
 from dlt.common.exceptions import MissingDependencyException
 
 try:
     import pandas
-    from pandas.io.sql import _wrap_result
 except ModuleNotFoundError:
     raise MissingDependencyException("DLT Pandas Helpers", ["pandas"])
+
+
+def pandas_to_arrow(df: pandas.DataFrame) -> Any:
+    """Converts pandas to arrow or raises an exception if pyarrow is not installed"""
+    from dlt.common.libs.pyarrow import pyarrow as pa
+
+    return pa.Table.from_pandas(df)
diff --git a/dlt/common/libs/pandas_sql.py b/dlt/common/libs/pandas_sql.py
new file mode 100644
index 0000000000..e9e2a7da11
--- /dev/null
+++ b/dlt/common/libs/pandas_sql.py
@@ -0,0 +1,7 @@
+from dlt.common.exceptions import MissingDependencyException
+
+
+try:
+    from pandas.io.sql import _wrap_result
+except ModuleNotFoundError:
+    raise MissingDependencyException("dlt pandas helper for sql", ["pandas"])
diff --git a/dlt/common/libs/pyarrow.py b/dlt/common/libs/pyarrow.py
index 183c27954b..c1fbfbff85 100644
--- a/dlt/common/libs/pyarrow.py
+++ b/dlt/common/libs/pyarrow.py
@@ -18,7 +18,9 @@
     import pyarrow.compute
 except ModuleNotFoundError:
     raise MissingDependencyException(
-        "dlt parquet Helpers", [f"{version.DLT_PKG_NAME}[parquet]"], "dlt Helpers for for parquet."
+        "dlt pyarrow helpers",
+        [f"{version.DLT_PKG_NAME}[parquet]"],
+        "Install pyarrow to be allow to load arrow tables, panda frames and to use parquet files.",
     )
 
 
diff --git a/dlt/common/normalizers/configuration.py b/dlt/common/normalizers/configuration.py
index 6957417f9d..adeefe2237 100644
--- a/dlt/common/normalizers/configuration.py
+++ b/dlt/common/normalizers/configuration.py
@@ -5,7 +5,7 @@
 from dlt.common.configuration.specs import BaseConfiguration
 from dlt.common.destination import DestinationCapabilitiesContext
 from dlt.common.normalizers.typing import TJSONNormalizer
-from dlt.common.typing import StrAny
+from dlt.common.typing import DictStrAny
 
 
 @configspec
@@ -14,7 +14,7 @@ class NormalizersConfiguration(BaseConfiguration):
     __section__: str = "schema"
 
     naming: Optional[str] = None
-    json_normalizer: Optional[StrAny] = None
+    json_normalizer: Optional[DictStrAny] = None
     destination_capabilities: Optional[DestinationCapabilitiesContext] = None  # injectable
 
     def on_resolved(self) -> None:
@@ -22,6 +22,16 @@ def on_resolved(self) -> None:
         if self.naming is None:
             if self.destination_capabilities:
                 self.naming = self.destination_capabilities.naming_convention
+        # if max_table_nesting is set, we need to set the max_table_nesting in the json_normalizer
+        if (
+            self.destination_capabilities
+            and self.destination_capabilities.max_table_nesting is not None
+        ):
+            self.json_normalizer = self.json_normalizer or {}
+            self.json_normalizer.setdefault("config", {})
+            self.json_normalizer["config"][
+                "max_nesting"
+            ] = self.destination_capabilities.max_table_nesting
 
     if TYPE_CHECKING:
 
diff --git a/dlt/common/normalizers/utils.py b/dlt/common/normalizers/utils.py
index dde78edede..645bad2bea 100644
--- a/dlt/common/normalizers/utils.py
+++ b/dlt/common/normalizers/utils.py
@@ -34,9 +34,11 @@ def import_normalizers(
     """
     # add defaults to normalizer_config
     normalizers_config["names"] = names = normalizers_config["names"] or "snake_case"
-    normalizers_config["json"] = item_normalizer = normalizers_config["json"] or {
-        "module": "dlt.common.normalizers.json.relational"
-    }
+    # set default json normalizer module
+    normalizers_config["json"] = item_normalizer = normalizers_config.get("json") or {}
+    if "module" not in item_normalizer:
+        item_normalizer["module"] = "dlt.common.normalizers.json.relational"
+
     try:
         if "." in names:
             # TODO: bump schema engine version and migrate schema. also change the name in  TNormalizersConfig from names to naming
diff --git a/dlt/common/pipeline.py b/dlt/common/pipeline.py
index df221ec703..3cbaafefbe 100644
--- a/dlt/common/pipeline.py
+++ b/dlt/common/pipeline.py
@@ -3,6 +3,7 @@
 import datetime  # noqa: 251
 import humanize
 import contextlib
+
 from typing import (
     Any,
     Callable,
@@ -40,11 +41,15 @@
 from dlt.common.schema.typing import TColumnNames, TColumnSchema, TWriteDisposition, TSchemaContract
 from dlt.common.source import get_current_pipe_name
 from dlt.common.storages.load_storage import LoadPackageInfo
+from dlt.common.storages.load_package import PackageStorage
+
 from dlt.common.time import ensure_pendulum_datetime, precise_time
 from dlt.common.typing import DictStrAny, REPattern, StrAny, SupportsHumanize
 from dlt.common.jsonpath import delete_matches, TAnyJsonPath
 from dlt.common.data_writers.writers import DataWriterMetrics, TLoaderFileFormat
 from dlt.common.utils import RowCounts, merge_row_counts
+from dlt.common.versioned_state import TVersionedState
+from dlt.common.storages.load_package import TLoadPackageState
 
 
 class _StepInfo(NamedTuple):
@@ -454,7 +459,7 @@ class TPipelineLocalState(TypedDict, total=False):
     """Hash of state that was recently synced with destination"""
 
 
-class TPipelineState(TypedDict, total=False):
+class TPipelineState(TVersionedState, total=False):
     """Schema for a pipeline state that is stored within the pipeline working directory"""
 
     pipeline_name: str
@@ -469,9 +474,6 @@ class TPipelineState(TypedDict, total=False):
     staging_type: Optional[str]
 
     # properties starting with _ are not automatically applied to pipeline object when state is restored
-    _state_version: int
-    _version_hash: str
-    _state_engine_version: int
     _local: TPipelineLocalState
     """A section of state that is not synchronized with the destination and does not participate in change merging and version control"""
 
diff --git a/dlt/common/reflection/spec.py b/dlt/common/reflection/spec.py
index 0a486088c8..5c39199f63 100644
--- a/dlt/common/reflection/spec.py
+++ b/dlt/common/reflection/spec.py
@@ -1,6 +1,6 @@
 import re
 import inspect
-from typing import Dict, List, Type, Any, Optional, NewType
+from typing import Dict, List, Tuple, Type, Any, Optional, NewType
 from inspect import Signature, Parameter
 
 from dlt.common.typing import AnyType, AnyFun, TSecretValue
@@ -26,15 +26,31 @@ def _first_up(s: str) -> str:
 
 
 def spec_from_signature(
-    f: AnyFun, sig: Signature, include_defaults: bool = True
-) -> Type[BaseConfiguration]:
+    f: AnyFun,
+    sig: Signature,
+    include_defaults: bool = True,
+    base: Type[BaseConfiguration] = BaseConfiguration,
+) -> Tuple[Type[BaseConfiguration], Dict[str, Any]]:
+    """Creates a SPEC on base `base1 for a function `f` with signature `sig`.
+
+    All the arguments in `sig` that are valid SPEC hints and have defaults will be part of the SPEC.
+    Special markers for required SPEC fields `dlt.secrets.value` and `dlt.config.value` are parsed using
+    module source code, which is a hack and will not work for modules not imported from a file.
+
+    The name of a SPEC type is inferred from qualname of `f` and type will refer to `f` module and is unique
+    for a module. NOTE: the SPECS are cached in the module by using name as an id.
+
+    Return value is a tuple of SPEC and SPEC fields created from a `sig`.
+    """
     name = _get_spec_name_from_f(f)
     module = inspect.getmodule(f)
+    base_fields = base.get_resolvable_fields()
 
     # check if spec for that function exists
     spec_id = name  # f"SPEC_{name}_kw_only_{kw_only}"
     if hasattr(module, spec_id):
-        return getattr(module, spec_id)  # type: ignore
+        MOD_SPEC: Type[BaseConfiguration] = getattr(module, spec_id)
+        return MOD_SPEC, MOD_SPEC.get_resolvable_fields()
 
     # find all the arguments that have following defaults
     literal_defaults: Dict[str, str] = None
@@ -59,7 +75,8 @@ def dlt_config_literal_to_type(arg_name: str) -> AnyType:
         return None
 
     # synthesize configuration from the signature
-    fields: Dict[str, Any] = {}
+    new_fields: Dict[str, Any] = {}
+    sig_base_fields: Dict[str, Any] = {}
     annotations: Dict[str, Any] = {}
 
     for p in sig.parameters.values():
@@ -69,6 +86,10 @@ def dlt_config_literal_to_type(arg_name: str) -> AnyType:
             "cls",
         ]:
             field_type = AnyType if p.annotation == Parameter.empty else p.annotation
+            # keep the base fields if sig not annotated
+            if p.name in base_fields and field_type is AnyType and p.default is None:
+                sig_base_fields[p.name] = base_fields[p.name]
+                continue
             # only valid hints and parameters with defaults are eligible
             if is_valid_hint(field_type) and p.default != Parameter.empty:
                 # try to get type from default
@@ -99,18 +120,17 @@ def dlt_config_literal_to_type(arg_name: str) -> AnyType:
                     # set annotations
                     annotations[p.name] = field_type
                     # set field with default value
-                    fields[p.name] = p.default
+                    new_fields[p.name] = p.default
 
-    if not fields:
-        return None
+    signature_fields = {**sig_base_fields, **new_fields}
 
     # new type goes to the module where sig was declared
-    fields["__module__"] = module.__name__
+    new_fields["__module__"] = module.__name__
     # set annotations so they are present in __dict__
-    fields["__annotations__"] = annotations
+    new_fields["__annotations__"] = annotations
     # synthesize type
-    T: Type[BaseConfiguration] = type(name, (BaseConfiguration,), fields)
+    T: Type[BaseConfiguration] = type(name, (base,), new_fields)
     SPEC = configspec()(T)
     # add to the module
     setattr(module, spec_id, SPEC)
-    return SPEC
+    return SPEC, signature_fields
diff --git a/dlt/common/schema/exceptions.py b/dlt/common/schema/exceptions.py
index 7f73bcbf36..96341ab8b4 100644
--- a/dlt/common/schema/exceptions.py
+++ b/dlt/common/schema/exceptions.py
@@ -98,7 +98,9 @@ def __init__(
         self.to_engine = to_engine
         super().__init__(
             f"No engine upgrade path in schema {schema_name} from {init_engine} to {to_engine},"
-            f" stopped at {from_engine}"
+            f" stopped at {from_engine}. You possibly tried to run an older dlt"
+            " version against a destination you have previously loaded data to with a newer dlt"
+            " version."
         )
 
 
diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py
index 4c81c8af72..92598fff44 100644
--- a/dlt/common/schema/schema.py
+++ b/dlt/common/schema/schema.py
@@ -82,7 +82,9 @@ class Schema:
     _imported_version_hash: str  # version hash of recently imported schema
     _schema_description: str  # optional schema description
     _schema_tables: TSchemaTables
-    _settings: TSchemaSettings  # schema settings to hold default hints, preferred types and other settings
+    _settings: (
+        TSchemaSettings  # schema settings to hold default hints, preferred types and other settings
+    )
 
     # list of preferred types: map regex on columns into types
     _compiled_preferred_types: List[Tuple[REPattern, TDataType]]
@@ -551,14 +553,20 @@ def get_table_columns(
                 if utils.is_complete_column(v)
             }
 
-    def data_tables(self, include_incomplete: bool = False) -> List[TTableSchema]:
+    def data_tables(
+        self, seen_data_only: bool = False, include_incomplete: bool = False
+    ) -> List[TTableSchema]:
         """Gets list of all tables, that hold the loaded data. Excludes dlt tables. Excludes incomplete tables (ie. without columns)"""
         return [
             t
             for t in self._schema_tables.values()
             if not t["name"].startswith(self._dlt_tables_prefix)
             and (
-                include_incomplete or len(self.get_table_columns(t["name"], include_incomplete)) > 0
+                (
+                    include_incomplete
+                    or len(self.get_table_columns(t["name"], include_incomplete)) > 0
+                )
+                and (not seen_data_only or utils.has_table_seen_data(t))
             )
         ]
 
diff --git a/dlt/common/storages/exceptions.py b/dlt/common/storages/exceptions.py
index 22d6dfaf79..f4288719c1 100644
--- a/dlt/common/storages/exceptions.py
+++ b/dlt/common/storages/exceptions.py
@@ -116,3 +116,11 @@ def __init__(self, schema_name: str, storage_path: str, stored_name: str) -> Non
             f"A schema file name '{schema_name}' in {storage_path} does not correspond to the name"
             f" of schema in the file {stored_name}"
         )
+
+
+class CurrentLoadPackageStateNotAvailable(StorageException):
+    def __init__(self) -> None:
+        super().__init__(
+            "State of the current load package is not available. Current load package state is"
+            " only available in a function decorated with @dlt.destination during loading."
+        )
diff --git a/dlt/common/storages/load_package.py b/dlt/common/storages/load_package.py
index 63409aa878..bb66e28671 100644
--- a/dlt/common/storages/load_package.py
+++ b/dlt/common/storages/load_package.py
@@ -1,6 +1,8 @@
 import contextlib
 import os
 from copy import deepcopy
+import threading
+
 import datetime  # noqa: 251
 import humanize
 from pathlib import Path
@@ -17,23 +19,92 @@
     Set,
     get_args,
     cast,
+    Any,
+    Tuple,
+    TYPE_CHECKING,
+    TypedDict,
 )
 
 from dlt.common import pendulum, json
+
+from dlt.common.configuration import configspec
+from dlt.common.configuration.specs import ContainerInjectableContext
+from dlt.common.configuration.exceptions import ContextDefaultCannotBeCreated
+from dlt.common.configuration.container import Container
+
 from dlt.common.data_writers import DataWriter, new_file_id
 from dlt.common.destination import TLoaderFileFormat
 from dlt.common.exceptions import TerminalValueError
 from dlt.common.schema import Schema, TSchemaTables
 from dlt.common.schema.typing import TStoredSchema, TTableSchemaColumns
 from dlt.common.storages import FileStorage
-from dlt.common.storages.exceptions import LoadPackageNotFound
-from dlt.common.typing import DictStrAny, StrAny, SupportsHumanize
+from dlt.common.storages.exceptions import LoadPackageNotFound, CurrentLoadPackageStateNotAvailable
+from dlt.common.typing import DictStrAny, SupportsHumanize
 from dlt.common.utils import flatten_list_or_items
+from dlt.common.versioned_state import (
+    generate_state_version_hash,
+    bump_state_version_if_modified,
+    TVersionedState,
+    default_versioned_state,
+)
+from typing_extensions import NotRequired
+
+
+class TLoadPackageState(TVersionedState, total=False):
+    created_at: str
+    """Timestamp when the loadpackage was created"""
+
+    """A section of state that does not participate in change merging and version control"""
+    destination_state: NotRequired[Dict[str, Any]]
+    """private space for destinations to store state relevant only to the load package"""
+
+
+class TLoadPackage(TypedDict, total=False):
+    load_id: str
+    """Load id"""
+    state: TLoadPackageState
+    """State of the load package"""
+
+
+# allows to upgrade state when restored with a new version of state logic/schema
+LOADPACKAGE_STATE_ENGINE_VERSION = 1
+
+
+def generate_loadpackage_state_version_hash(state: TLoadPackageState) -> str:
+    return generate_state_version_hash(state)
+
+
+def bump_loadpackage_state_version_if_modified(state: TLoadPackageState) -> Tuple[int, str, str]:
+    return bump_state_version_if_modified(state)
+
+
+def migrate_load_package_state(
+    state: DictStrAny, from_engine: int, to_engine: int
+) -> TLoadPackageState:
+    # TODO: if you start adding new versions, we need proper tests for these migrations!
+    # NOTE: do not touch destinations state, it is not versioned
+    if from_engine == to_engine:
+        return cast(TLoadPackageState, state)
+
+    # check state engine
+    if from_engine != to_engine:
+        raise Exception("No upgrade path for loadpackage state")
+
+    state["_state_engine_version"] = from_engine
+    return cast(TLoadPackageState, state)
+
+
+def default_load_package_state() -> TLoadPackageState:
+    return {
+        **default_versioned_state(),
+        "_state_engine_version": LOADPACKAGE_STATE_ENGINE_VERSION,
+    }
+
 
 # folders to manage load jobs in a single load package
 TJobState = Literal["new_jobs", "failed_jobs", "started_jobs", "completed_jobs"]
 WORKING_FOLDERS: Set[TJobState] = set(get_args(TJobState))
-TLoadPackageState = Literal["new", "extracted", "normalized", "loaded", "aborted"]
+TLoadPackageStatus = Literal["new", "extracted", "normalized", "loaded", "aborted"]
 
 
 class ParsedLoadJobFileName(NamedTuple):
@@ -125,7 +196,7 @@ def __str__(self) -> str:
 class _LoadPackageInfo(NamedTuple):
     load_id: str
     package_path: str
-    state: TLoadPackageState
+    state: TLoadPackageStatus
     schema: Schema
     schema_update: TSchemaTables
     completed_at: datetime.datetime
@@ -201,8 +272,11 @@ class PackageStorage:
     PACKAGE_COMPLETED_FILE_NAME = (  # completed package marker file, currently only to store data with os.stat
         "package_completed.json"
     )
+    LOAD_PACKAGE_STATE_FILE_NAME = (  # internal state of the load package, will not be synced to the destination
+        "load_package_state.json"
+    )
 
-    def __init__(self, storage: FileStorage, initial_state: TLoadPackageState) -> None:
+    def __init__(self, storage: FileStorage, initial_state: TLoadPackageStatus) -> None:
         """Creates storage that manages load packages with root at `storage` and initial package state `initial_state`"""
         self.storage = storage
         self.initial_state = initial_state
@@ -334,8 +408,13 @@ def create_package(self, load_id: str) -> None:
         self.storage.create_folder(os.path.join(load_id, PackageStorage.COMPLETED_JOBS_FOLDER))
         self.storage.create_folder(os.path.join(load_id, PackageStorage.FAILED_JOBS_FOLDER))
         self.storage.create_folder(os.path.join(load_id, PackageStorage.STARTED_JOBS_FOLDER))
+        # ensure created timestamp is set in state when load package is created
+        state = self.get_load_package_state(load_id)
+        if not state.get("created_at"):
+            state["created_at"] = pendulum.now().to_iso8601_string()
+            self.save_load_package_state(load_id, state)
 
-    def complete_loading_package(self, load_id: str, load_state: TLoadPackageState) -> str:
+    def complete_loading_package(self, load_id: str, load_state: TLoadPackageStatus) -> str:
         """Completes loading the package by writing marker file with`package_state. Returns path to the completed package"""
         load_path = self.get_package_path(load_id)
         # save marker file
@@ -381,6 +460,36 @@ def save_schema_updates(self, load_id: str, schema_update: TSchemaTables) -> Non
         ) as f:
             json.dump(schema_update, f)
 
+    #
+    # Loadpackage state
+    #
+    def get_load_package_state(self, load_id: str) -> TLoadPackageState:
+        package_path = self.get_package_path(load_id)
+        if not self.storage.has_folder(package_path):
+            raise LoadPackageNotFound(load_id)
+        try:
+            state_dump = self.storage.load(self.get_load_package_state_path(load_id))
+            state = json.loads(state_dump)
+            return migrate_load_package_state(
+                state, state["_state_engine_version"], LOADPACKAGE_STATE_ENGINE_VERSION
+            )
+        except FileNotFoundError:
+            return default_load_package_state()
+
+    def save_load_package_state(self, load_id: str, state: TLoadPackageState) -> None:
+        package_path = self.get_package_path(load_id)
+        if not self.storage.has_folder(package_path):
+            raise LoadPackageNotFound(load_id)
+        bump_loadpackage_state_version_if_modified(state)
+        self.storage.save(
+            self.get_load_package_state_path(load_id),
+            json.dumps(state),
+        )
+
+    def get_load_package_state_path(self, load_id: str) -> str:
+        package_path = self.get_package_path(load_id)
+        return os.path.join(package_path, PackageStorage.LOAD_PACKAGE_STATE_FILE_NAME)
+
     #
     # Get package info
     #
@@ -514,3 +623,59 @@ def filter_jobs_for_table(
         all_jobs: Iterable[LoadJobInfo], table_name: str
     ) -> Sequence[LoadJobInfo]:
         return [job for job in all_jobs if job.job_file_info.table_name == table_name]
+
+
+@configspec
+class LoadPackageStateInjectableContext(ContainerInjectableContext):
+    storage: PackageStorage
+    load_id: str
+    can_create_default: ClassVar[bool] = False
+    global_affinity: ClassVar[bool] = False
+
+    def commit(self) -> None:
+        with self.state_save_lock:
+            self.storage.save_load_package_state(self.load_id, self.state)
+
+    def on_resolved(self) -> None:
+        self.state_save_lock = threading.Lock()
+        self.state = self.storage.get_load_package_state(self.load_id)
+
+    if TYPE_CHECKING:
+
+        def __init__(self, load_id: str, storage: PackageStorage) -> None: ...
+
+
+def load_package() -> TLoadPackage:
+    """Get full load package state present in current context. Across all threads this will be the same in memory dict."""
+    container = Container()
+    # get injected state if present. injected load package state is typically "managed" so changes will be persisted
+    # if you need to save the load package state during a load, you need to call commit_load_package_state
+    try:
+        state_ctx = container[LoadPackageStateInjectableContext]
+    except ContextDefaultCannotBeCreated:
+        raise CurrentLoadPackageStateNotAvailable()
+    return TLoadPackage(state=state_ctx.state, load_id=state_ctx.load_id)
+
+
+def commit_load_package_state() -> None:
+    """Commit load package state present in current context. This is thread safe."""
+    container = Container()
+    try:
+        state_ctx = container[LoadPackageStateInjectableContext]
+    except ContextDefaultCannotBeCreated:
+        raise CurrentLoadPackageStateNotAvailable()
+    state_ctx.commit()
+
+
+def destination_state() -> DictStrAny:
+    """Get segment of load package state that is specific to the current destination."""
+    lp = load_package()
+    return lp["state"].setdefault("destination_state", {})
+
+
+def clear_destination_state(commit: bool = True) -> None:
+    """Clear segment of load package state that is specific to the current destination. Optionally commit to load package."""
+    lp = load_package()
+    lp["state"].pop("destination_state", None)
+    if commit:
+        commit_load_package_state()
diff --git a/dlt/common/storages/load_storage.py b/dlt/common/storages/load_storage.py
index a83502cb9b..ffd55e7f29 100644
--- a/dlt/common/storages/load_storage.py
+++ b/dlt/common/storages/load_storage.py
@@ -1,6 +1,7 @@
 from os.path import join
 from typing import Iterable, Optional, Sequence
 
+from dlt.common.typing import DictStrAny
 from dlt.common import json
 from dlt.common.configuration import known_sections
 from dlt.common.configuration.inject import with_config
@@ -18,6 +19,7 @@
     PackageStorage,
     ParsedLoadJobFileName,
     TJobState,
+    TLoadPackageState,
 )
 from dlt.common.storages.exceptions import JobWithUnsupportedWriterException, LoadPackageNotFound
 
@@ -38,6 +40,11 @@ def __init__(
         supported_file_formats: Iterable[TLoaderFileFormat],
         config: LoadStorageConfiguration = config.value,
     ) -> None:
+        # puae-jsonl jobs have the extension .jsonl, so cater for this here
+        if supported_file_formats and "puae-jsonl" in supported_file_formats:
+            supported_file_formats = list(supported_file_formats)
+            supported_file_formats.append("jsonl")
+
         if not LoadStorage.ALL_SUPPORTED_FILE_FORMATS.issuperset(supported_file_formats):
             raise TerminalValueError(supported_file_formats)
         if preferred_file_format and preferred_file_format not in supported_file_formats:
@@ -79,7 +86,7 @@ def _get_data_item_path_template(self, load_id: str, _: str, table_name: str) ->
     def list_new_jobs(self, load_id: str) -> Sequence[str]:
         """Lists all jobs in new jobs folder of normalized package storage and checks if file formats are supported"""
         new_jobs = self.normalized_packages.list_new_jobs(load_id)
-        # # make sure all jobs have supported writers
+        # make sure all jobs have supported writers
         wrong_job = next(
             (
                 j
@@ -184,3 +191,10 @@ def get_load_package_info(self, load_id: str) -> LoadPackageInfo:
             return self.loaded_packages.get_load_package_info(load_id)
         except LoadPackageNotFound:
             return self.normalized_packages.get_load_package_info(load_id)
+
+    def get_load_package_state(self, load_id: str) -> TLoadPackageState:
+        """Gets state of normlized or loaded package with given load_id, all jobs and their statuses."""
+        try:
+            return self.loaded_packages.get_load_package_state(load_id)
+        except LoadPackageNotFound:
+            return self.normalized_packages.get_load_package_state(load_id)
diff --git a/dlt/common/storages/normalize_storage.py b/dlt/common/storages/normalize_storage.py
index 8a247c2021..2b90b7c088 100644
--- a/dlt/common/storages/normalize_storage.py
+++ b/dlt/common/storages/normalize_storage.py
@@ -51,7 +51,9 @@ def list_files_to_normalize_sorted(self) -> Sequence[str]:
                 [
                     file
                     for file in files
-                    if not file.endswith(PackageStorage.SCHEMA_FILE_NAME) and os.path.isfile(file)
+                    if not file.endswith(PackageStorage.SCHEMA_FILE_NAME)
+                    and os.path.isfile(file)
+                    and not file.endswith(PackageStorage.LOAD_PACKAGE_STATE_FILE_NAME)
                 ]
             )
 
diff --git a/dlt/common/validation.py b/dlt/common/validation.py
index 6bf1356aeb..4b54d6a29e 100644
--- a/dlt/common/validation.py
+++ b/dlt/common/validation.py
@@ -1,5 +1,6 @@
 import contextlib
 import functools
+import inspect
 from typing import Callable, Any, Type
 from typing_extensions import get_type_hints, get_args
 
@@ -38,11 +39,10 @@ def validate_dict(
         filter_f (TFilterFunc, optional): A function to filter keys in `doc`. It should
             return `True` for keys to be kept. Defaults to a function that keeps all keys.
         validator_f (TCustomValidator, optional): A function to perform additional validation
-            for types not covered by this function. It should return `True` if the validation passes.
+            for types not covered by this function. It should return `True` if the validation passes
+            or raise DictValidationException on validation error. For types it cannot validate, it
+            should return False to allow chaining.
             Defaults to a function that rejects all such types.
-        filter_required (TFilterFunc, optional): A function to filter out required fields, useful
-            for testing historic versions of dict that might now have certain fields yet.
-
     Raises:
         DictValidationException: If there are missing required fields, unexpected fields,
             type mismatches or unvalidated types in `doc` compared to `spec`.
@@ -162,8 +162,23 @@ def verify_prop(pk: str, pv: Any, t: Any) -> None:
         elif t is Any:
             # pass everything with any type
             pass
+        elif inspect.isclass(t) and isinstance(pv, t):
+            # allow instances of classes
+            pass
         else:
+            type_name = getattr(t, "__name__", str(t))
+            pv_type_name = getattr(type(pv), "__name__", str(type(pv)))
+            # try to apply special validator
             if not validator_f(path, pk, pv, t):
+                # type `t` cannot be validated by validator_f
+                if inspect.isclass(t):
+                    if not isinstance(pv, t):
+                        raise DictValidationException(
+                            f"In {path}: field {pk} expect class {type_name} but got instance of"
+                            f" {pv_type_name}",
+                            path,
+                            pk,
+                        )
                 # TODO: when Python 3.9 and earlier support is
                 # dropped, just __name__ can be used
                 type_name = getattr(t, "__name__", str(t))
diff --git a/dlt/common/versioned_state.py b/dlt/common/versioned_state.py
new file mode 100644
index 0000000000..a051a6660c
--- /dev/null
+++ b/dlt/common/versioned_state.py
@@ -0,0 +1,45 @@
+import base64
+import hashlib
+from copy import copy
+
+import datetime  # noqa: 251
+from dlt.common import json
+from typing import TypedDict, Dict, Any, List, Tuple, cast
+
+
+class TVersionedState(TypedDict, total=False):
+    _state_version: int
+    _version_hash: str
+    _state_engine_version: int
+
+
+def generate_state_version_hash(state: TVersionedState, exclude_attrs: List[str] = None) -> str:
+    # generates hash out of stored schema content, excluding hash itself, version and local state
+    state_copy = copy(state)
+    exclude_attrs = exclude_attrs or []
+    exclude_attrs.extend(["_state_version", "_state_engine_version", "_version_hash"])
+    for attr in exclude_attrs:
+        state_copy.pop(attr, None)  # type: ignore
+    content = json.typed_dumpb(state_copy, sort_keys=True)  # type: ignore
+    h = hashlib.sha3_256(content)
+    return base64.b64encode(h.digest()).decode("ascii")
+
+
+def bump_state_version_if_modified(
+    state: TVersionedState, exclude_attrs: List[str] = None
+) -> Tuple[int, str, str]:
+    """Bumps the `state` version and version hash if content modified, returns (new version, new hash, old hash) tuple"""
+    hash_ = generate_state_version_hash(state, exclude_attrs)
+    previous_hash = state.get("_version_hash")
+    if not previous_hash:
+        # if hash was not set, set it without bumping the version, that's the initial state
+        pass
+    elif hash_ != previous_hash:
+        state["_state_version"] += 1
+
+    state["_version_hash"] = hash_
+    return state["_state_version"], hash_, previous_hash
+
+
+def default_versioned_state() -> TVersionedState:
+    return {"_state_version": 0, "_state_engine_version": 1}
diff --git a/dlt/destinations/__init__.py b/dlt/destinations/__init__.py
index c0a0b419c1..4a10deffc0 100644
--- a/dlt/destinations/__init__.py
+++ b/dlt/destinations/__init__.py
@@ -10,6 +10,7 @@
 from dlt.destinations.impl.qdrant.factory import qdrant
 from dlt.destinations.impl.motherduck.factory import motherduck
 from dlt.destinations.impl.weaviate.factory import weaviate
+from dlt.destinations.impl.destination.factory import destination
 from dlt.destinations.impl.synapse.factory import synapse
 from dlt.destinations.impl.databricks.factory import databricks
 
@@ -29,4 +30,5 @@
     "weaviate",
     "synapse",
     "databricks",
+    "destination",
 ]
diff --git a/dlt/destinations/decorators.py b/dlt/destinations/decorators.py
new file mode 100644
index 0000000000..62d059c4a6
--- /dev/null
+++ b/dlt/destinations/decorators.py
@@ -0,0 +1,96 @@
+import functools
+
+from typing import Any, Type, Optional, Callable, Union, cast
+from typing_extensions import Concatenate
+from dlt.common.typing import AnyFun
+
+from functools import wraps
+
+from dlt.common import logger
+from dlt.destinations.impl.destination.factory import destination as _destination
+from dlt.destinations.impl.destination.configuration import (
+    TDestinationCallableParams,
+    CustomDestinationClientConfiguration,
+)
+from dlt.common.destination import TLoaderFileFormat
+from dlt.common.destination.reference import Destination
+from dlt.common.typing import TDataItems
+from dlt.common.schema import TTableSchema
+
+
+def destination(
+    func: Optional[AnyFun] = None,
+    /,
+    loader_file_format: TLoaderFileFormat = None,
+    batch_size: int = 10,
+    name: str = None,
+    naming_convention: str = "direct",
+    skip_dlt_columns_and_tables: bool = True,
+    max_table_nesting: int = 0,
+    spec: Type[CustomDestinationClientConfiguration] = None,
+) -> Callable[
+    [Callable[Concatenate[Union[TDataItems, str], TTableSchema, TDestinationCallableParams], Any]],
+    Callable[TDestinationCallableParams, _destination],
+]:
+    """A decorator that transforms a function that takes two positional arguments "table" and "items" and any number of keyword arguments with defaults
+    into a callable that will create a custom destination. The function does not return anything, the keyword arguments can be configuration and secrets values.
+
+    #### Example Usage with Configuration and Secrets:
+
+    >>> @dlt.destination(batch_size=100, loader_file_format="parquet")
+    >>> def my_destination(items, table, api_url: str = dlt.config.value, api_secret = dlt.secrets.value):
+    >>>     print(table["name"])
+    >>>     print(items)
+    >>>
+    >>> p = dlt.pipeline("chess_pipeline", destination=my_destination)
+
+    Here all incoming data will be sent to the destination function with the items in the requested format and the dlt table schema.
+    The config and secret values will be resolved from the path destination.my_destination.api_url and destination.my_destination.api_secret.
+
+    #### Args:
+        batch_size: defines how many items per function call are batched together and sent as an array. If you set a batch-size of 0, instead of passing in actual dataitems, you will receive one call per load job with the path of the file as the items argument. You can then open and process that file in any way you like.
+        loader_file_format: defines in which format files are stored in the load package before being sent to the destination function, this can be puae-jsonl or parquet.
+        name: defines the name of the destination that get's created by the destination decorator, defaults to the name of the function
+        naming_convention: defines the name of the destination that gets created by the destination decorator. This controls how table and column names are normalized. The default is direct which will keep all names the same.
+        max_nesting_level: defines how deep the normalizer will go to normalize complex fields on your data to create subtables. This overwrites any settings on your source and is set to zero to not create any nested tables by default.
+        skip_dlt_columns_and_tables: defines wether internal tables and columns will be fed into the custom destination function. This is set to True by default.
+        spec: defines a configuration spec that will be used to to inject arguments into the decorated functions. Argument not in spec will not be injected
+
+    Returns:
+        A callable that can be used to create a dlt custom destination instance
+    """
+
+    def decorator(
+        destination_callable: Callable[
+            Concatenate[Union[TDataItems, str], TTableSchema, TDestinationCallableParams], Any
+        ]
+    ) -> Callable[TDestinationCallableParams, _destination]:
+        @wraps(destination_callable)
+        def wrapper(
+            *args: TDestinationCallableParams.args, **kwargs: TDestinationCallableParams.kwargs
+        ) -> _destination:
+            if args:
+                logger.warning(
+                    "Ignoring positional arguments for destination callable %s",
+                    destination_callable,
+                )
+            return _destination(
+                spec=spec,
+                destination_callable=destination_callable,
+                loader_file_format=loader_file_format,
+                batch_size=batch_size,
+                destination_name=name,
+                naming_convention=naming_convention,
+                skip_dlt_columns_and_tables=skip_dlt_columns_and_tables,
+                max_table_nesting=max_table_nesting,
+                **kwargs,  # type: ignore
+            )
+
+        return wrapper
+
+    if func is None:
+        # we're called with parens.
+        return decorator
+
+    # we're called as @source without parens.
+    return decorator(func)  # type: ignore
diff --git a/dlt/destinations/impl/athena/athena.py b/dlt/destinations/impl/athena/athena.py
index 9d79d8bf55..b323832418 100644
--- a/dlt/destinations/impl/athena/athena.py
+++ b/dlt/destinations/impl/athena/athena.py
@@ -37,7 +37,7 @@
 from dlt.common.schema.typing import TTableSchema, TColumnType, TWriteDisposition, TTableFormat
 from dlt.common.schema.utils import table_schema_has_type, get_table_format
 from dlt.common.destination import DestinationCapabilitiesContext
-from dlt.common.destination.reference import LoadJob, FollowupJob
+from dlt.common.destination.reference import LoadJob, DoNothingFollowupJob, DoNothingJob
 from dlt.common.destination.reference import TLoadJobState, NewLoadJob, SupportsStagingDestination
 from dlt.common.storages import FileStorage
 from dlt.common.data_writers.escape import escape_bigquery_identifier
@@ -149,27 +149,6 @@ def __init__(self) -> None:
         DLTAthenaFormatter._INSTANCE = self
 
 
-class DoNothingJob(LoadJob):
-    """The most lazy class of dlt"""
-
-    def __init__(self, file_path: str) -> None:
-        super().__init__(FileStorage.get_file_name_from_file_path(file_path))
-
-    def state(self) -> TLoadJobState:
-        # this job is always done
-        return "completed"
-
-    def exception(self) -> str:
-        # this part of code should be never reached
-        raise NotImplementedError()
-
-
-class DoNothingFollowupJob(DoNothingJob, FollowupJob):
-    """The second most lazy class of dlt"""
-
-    pass
-
-
 class AthenaSQLClient(SqlClientBase[Connection]):
     capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities()
     dbapi: ClassVar[DBApi] = pyathena
diff --git a/dlt/destinations/impl/destination/__init__.py b/dlt/destinations/impl/destination/__init__.py
new file mode 100644
index 0000000000..560c9d4eda
--- /dev/null
+++ b/dlt/destinations/impl/destination/__init__.py
@@ -0,0 +1,17 @@
+from typing import Optional
+from dlt.common.destination import DestinationCapabilitiesContext
+from dlt.common.data_writers import TLoaderFileFormat
+
+
+def capabilities(
+    preferred_loader_file_format: TLoaderFileFormat = "puae-jsonl",
+    naming_convention: str = "direct",
+    max_table_nesting: Optional[int] = 0,
+) -> DestinationCapabilitiesContext:
+    caps = DestinationCapabilitiesContext.generic_capabilities(preferred_loader_file_format)
+    caps.supported_loader_file_formats = ["puae-jsonl", "parquet"]
+    caps.supports_ddl_transactions = False
+    caps.supports_transactions = False
+    caps.naming_convention = naming_convention
+    caps.max_table_nesting = max_table_nesting
+    return caps
diff --git a/dlt/destinations/impl/destination/configuration.py b/dlt/destinations/impl/destination/configuration.py
new file mode 100644
index 0000000000..f123ba69b3
--- /dev/null
+++ b/dlt/destinations/impl/destination/configuration.py
@@ -0,0 +1,34 @@
+from typing import TYPE_CHECKING, Optional, Final, Callable, Union, Any
+from typing_extensions import ParamSpec
+
+from dlt.common.configuration import configspec
+from dlt.common.destination import TLoaderFileFormat
+from dlt.common.destination.reference import (
+    DestinationClientConfiguration,
+)
+from dlt.common.typing import TDataItems
+from dlt.common.schema import TTableSchema
+from dlt.common.destination import Destination
+
+TDestinationCallable = Callable[[Union[TDataItems, str], TTableSchema], None]
+TDestinationCallableParams = ParamSpec("TDestinationCallableParams")
+
+
+@configspec
+class CustomDestinationClientConfiguration(DestinationClientConfiguration):
+    destination_type: Final[str] = "destination"  # type: ignore
+    destination_callable: Optional[Union[str, TDestinationCallable]] = None  # noqa: A003
+    loader_file_format: TLoaderFileFormat = "puae-jsonl"
+    batch_size: int = 10
+    skip_dlt_columns_and_tables: bool = True
+    max_table_nesting: int = 0
+
+    if TYPE_CHECKING:
+
+        def __init__(
+            self,
+            *,
+            loader_file_format: TLoaderFileFormat = "puae-jsonl",
+            batch_size: int = 10,
+            destination_callable: Union[TDestinationCallable, str] = None,
+        ) -> None: ...
diff --git a/dlt/destinations/impl/destination/destination.py b/dlt/destinations/impl/destination/destination.py
new file mode 100644
index 0000000000..4a3cabde34
--- /dev/null
+++ b/dlt/destinations/impl/destination/destination.py
@@ -0,0 +1,212 @@
+from abc import ABC, abstractmethod
+from types import TracebackType
+from typing import ClassVar, Dict, Optional, Type, Iterable, Iterable, cast, Dict, List
+from copy import deepcopy
+
+from dlt.common.destination.reference import LoadJob
+from dlt.destinations.job_impl import EmptyLoadJob
+from dlt.common.typing import TDataItems, AnyFun
+from dlt.common import json
+from dlt.pipeline.current import (
+    destination_state,
+    commit_load_package_state,
+)
+from dlt.common.configuration import create_resolved_partial
+
+from dlt.common.schema import Schema, TTableSchema, TSchemaTables
+from dlt.common.schema.typing import TTableSchema
+from dlt.common.storages import FileStorage
+from dlt.common.destination import DestinationCapabilitiesContext
+from dlt.common.destination.reference import (
+    TLoadJobState,
+    LoadJob,
+    DoNothingJob,
+    JobClientBase,
+)
+
+from dlt.destinations.impl.destination import capabilities
+from dlt.destinations.impl.destination.configuration import (
+    CustomDestinationClientConfiguration,
+    TDestinationCallable,
+)
+
+
+class DestinationLoadJob(LoadJob, ABC):
+    def __init__(
+        self,
+        table: TTableSchema,
+        file_path: str,
+        config: CustomDestinationClientConfiguration,
+        schema: Schema,
+        destination_state: Dict[str, int],
+        destination_callable: TDestinationCallable,
+        skipped_columns: List[str],
+    ) -> None:
+        super().__init__(FileStorage.get_file_name_from_file_path(file_path))
+        self._file_path = file_path
+        self._config = config
+        self._table = table
+        self._schema = schema
+        # we create pre_resolved callable here
+        self._callable = destination_callable
+        self._state: TLoadJobState = "running"
+        self._storage_id = f"{self._parsed_file_name.table_name}.{self._parsed_file_name.file_id}"
+        self.skipped_columns = skipped_columns
+        try:
+            if self._config.batch_size == 0:
+                # on batch size zero we only call the callable with the filename
+                self.call_callable_with_items(self._file_path)
+            else:
+                current_index = destination_state.get(self._storage_id, 0)
+                for batch in self.run(current_index):
+                    self.call_callable_with_items(batch)
+                    current_index += len(batch)
+                    destination_state[self._storage_id] = current_index
+
+            self._state = "completed"
+        except Exception as e:
+            self._state = "retry"
+            raise e
+        finally:
+            # save progress
+            commit_load_package_state()
+
+    @abstractmethod
+    def run(self, start_index: int) -> Iterable[TDataItems]:
+        pass
+
+    def call_callable_with_items(self, items: TDataItems) -> None:
+        if not items:
+            return
+        # call callable
+        self._callable(items, self._table)
+
+    def state(self) -> TLoadJobState:
+        return self._state
+
+    def exception(self) -> str:
+        raise NotImplementedError()
+
+
+class DestinationParquetLoadJob(DestinationLoadJob):
+    def run(self, start_index: int) -> Iterable[TDataItems]:
+        # stream items
+        from dlt.common.libs.pyarrow import pyarrow
+
+        # guard against changed batch size after restart of loadjob
+        assert (
+            start_index % self._config.batch_size
+        ) == 0, "Batch size was changed during processing of one load package"
+
+        # on record batches we cannot drop columns, we need to
+        # select the ones we want to keep
+        keep_columns = list(self._table["columns"].keys())
+        start_batch = start_index / self._config.batch_size
+        with pyarrow.parquet.ParquetFile(self._file_path) as reader:
+            for record_batch in reader.iter_batches(
+                batch_size=self._config.batch_size, columns=keep_columns
+            ):
+                if start_batch > 0:
+                    start_batch -= 1
+                    continue
+                yield record_batch
+
+
+class DestinationJsonlLoadJob(DestinationLoadJob):
+    def run(self, start_index: int) -> Iterable[TDataItems]:
+        current_batch: TDataItems = []
+
+        # stream items
+        with FileStorage.open_zipsafe_ro(self._file_path) as f:
+            encoded_json = json.typed_loads(f.read())
+
+            for item in encoded_json:
+                # find correct start position
+                if start_index > 0:
+                    start_index -= 1
+                    continue
+                # skip internal columns
+                for column in self.skipped_columns:
+                    item.pop(column, None)
+                current_batch.append(item)
+                if len(current_batch) == self._config.batch_size:
+                    yield current_batch
+                    current_batch = []
+            yield current_batch
+
+
+class DestinationClient(JobClientBase):
+    """Sink Client"""
+
+    capabilities: ClassVar[DestinationCapabilitiesContext] = capabilities()
+
+    def __init__(self, schema: Schema, config: CustomDestinationClientConfiguration) -> None:
+        super().__init__(schema, config)
+        self.config: CustomDestinationClientConfiguration = config
+        # create pre-resolved callable to avoid multiple config resolutions during execution of the jobs
+        self.destination_callable = create_resolved_partial(
+            cast(AnyFun, self.config.destination_callable), self.config
+        )
+
+    def initialize_storage(self, truncate_tables: Iterable[str] = None) -> None:
+        pass
+
+    def is_storage_initialized(self) -> bool:
+        return True
+
+    def drop_storage(self) -> None:
+        pass
+
+    def update_stored_schema(
+        self, only_tables: Iterable[str] = None, expected_update: TSchemaTables = None
+    ) -> Optional[TSchemaTables]:
+        return super().update_stored_schema(only_tables, expected_update)
+
+    def start_file_load(self, table: TTableSchema, file_path: str, load_id: str) -> LoadJob:
+        # skip internal tables and remove columns from schema if so configured
+        skipped_columns: List[str] = []
+        if self.config.skip_dlt_columns_and_tables:
+            if table["name"].startswith(self.schema._dlt_tables_prefix):
+                return DoNothingJob(file_path)
+            table = deepcopy(table)
+            for column in list(table["columns"].keys()):
+                if column.startswith(self.schema._dlt_tables_prefix):
+                    table["columns"].pop(column)
+                    skipped_columns.append(column)
+
+        # save our state in destination name scope
+        load_state = destination_state()
+        if file_path.endswith("parquet"):
+            return DestinationParquetLoadJob(
+                table,
+                file_path,
+                self.config,
+                self.schema,
+                load_state,
+                self.destination_callable,
+                skipped_columns,
+            )
+        if file_path.endswith("jsonl"):
+            return DestinationJsonlLoadJob(
+                table,
+                file_path,
+                self.config,
+                self.schema,
+                load_state,
+                self.destination_callable,
+                skipped_columns,
+            )
+        return None
+
+    def restore_file_load(self, file_path: str) -> LoadJob:
+        return EmptyLoadJob.from_file_path(file_path, "completed")
+
+    def complete_load(self, load_id: str) -> None: ...
+
+    def __enter__(self) -> "DestinationClient":
+        return self
+
+    def __exit__(
+        self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType
+    ) -> None:
+        pass
diff --git a/dlt/destinations/impl/destination/factory.py b/dlt/destinations/impl/destination/factory.py
new file mode 100644
index 0000000000..7cca8f2202
--- /dev/null
+++ b/dlt/destinations/impl/destination/factory.py
@@ -0,0 +1,144 @@
+import typing as t
+import inspect
+from importlib import import_module
+
+from types import ModuleType
+from dlt.common.typing import AnyFun
+
+from dlt.common.destination import Destination, DestinationCapabilitiesContext
+from dlt.destinations.exceptions import DestinationTransientException
+from dlt.common.configuration import known_sections, with_config, get_fun_spec
+from dlt.common.configuration.exceptions import ConfigurationValueError
+from dlt.common import logger
+
+from dlt.destinations.impl.destination.configuration import (
+    CustomDestinationClientConfiguration,
+    TDestinationCallable,
+)
+from dlt.destinations.impl.destination import capabilities
+from dlt.common.data_writers import TLoaderFileFormat
+from dlt.common.utils import get_callable_name, is_inner_callable
+
+if t.TYPE_CHECKING:
+    from dlt.destinations.impl.destination.destination import DestinationClient
+
+
+class DestinationInfo(t.NamedTuple):
+    """Runtime information on a discovered destination"""
+
+    SPEC: t.Type[CustomDestinationClientConfiguration]
+    f: AnyFun
+    module: ModuleType
+
+
+_DESTINATIONS: t.Dict[str, DestinationInfo] = {}
+"""A registry of all the decorated destinations"""
+
+
+class destination(Destination[CustomDestinationClientConfiguration, "DestinationClient"]):
+    def capabilities(self) -> DestinationCapabilitiesContext:
+        return capabilities(
+            preferred_loader_file_format=self.config_params.get("loader_file_format", "puae-jsonl"),
+            naming_convention=self.config_params.get("naming_convention", "direct"),
+            max_table_nesting=self.config_params.get("max_table_nesting", None),
+        )
+
+    @property
+    def spec(self) -> t.Type[CustomDestinationClientConfiguration]:
+        """A spec of destination configuration resolved from the sink function signature"""
+        return self._spec
+
+    @property
+    def client_class(self) -> t.Type["DestinationClient"]:
+        from dlt.destinations.impl.destination.destination import DestinationClient
+
+        return DestinationClient
+
+    def __init__(
+        self,
+        destination_callable: t.Union[TDestinationCallable, str] = None,  # noqa: A003
+        destination_name: t.Optional[str] = None,
+        environment: t.Optional[str] = None,
+        loader_file_format: TLoaderFileFormat = None,
+        batch_size: int = 10,
+        naming_convention: str = "direct",
+        spec: t.Type[CustomDestinationClientConfiguration] = None,
+        **kwargs: t.Any,
+    ) -> None:
+        if spec and not issubclass(spec, CustomDestinationClientConfiguration):
+            raise ValueError(
+                "A SPEC for a sink destination must use CustomDestinationClientConfiguration as a"
+                " base."
+            )
+        # resolve callable
+        if callable(destination_callable):
+            pass
+        elif destination_callable:
+            try:
+                module_path, attr_name = destination_callable.rsplit(".", 1)
+                dest_module = import_module(module_path)
+            except ModuleNotFoundError as e:
+                raise ConfigurationValueError(
+                    f"Could not find callable module at {module_path}"
+                ) from e
+            try:
+                destination_callable = getattr(dest_module, attr_name)
+            except AttributeError as e:
+                raise ConfigurationValueError(
+                    f"Could not find callable function at {destination_callable}"
+                ) from e
+
+        # provide dummy callable for cases where no callable is provided
+        # this is needed for cli commands to work
+        if not destination_callable:
+            logger.warning(
+                "No destination callable provided, providing dummy callable which will fail on"
+                " load."
+            )
+
+            def dummy_callable(*args: t.Any, **kwargs: t.Any) -> None:
+                raise DestinationTransientException(
+                    "You tried to load to a custom destination without a valid callable."
+                )
+
+            destination_callable = dummy_callable
+
+        elif not callable(destination_callable):
+            raise ConfigurationValueError("Resolved Sink destination callable is not a callable.")
+
+        # resolve destination name
+        if destination_name is None:
+            destination_name = get_callable_name(destination_callable)
+        func_module = inspect.getmodule(destination_callable)
+
+        # build destination spec
+        destination_sections = (known_sections.DESTINATION, destination_name)
+        conf_callable = with_config(
+            destination_callable,
+            spec=spec,
+            sections=destination_sections,
+            include_defaults=True,
+            base=None if spec else CustomDestinationClientConfiguration,
+        )
+
+        # save destination in registry
+        resolved_spec = t.cast(
+            t.Type[CustomDestinationClientConfiguration], get_fun_spec(conf_callable)
+        )
+        # register only standalone destinations, no inner
+        if not is_inner_callable(destination_callable):
+            _DESTINATIONS[destination_callable.__qualname__] = DestinationInfo(
+                resolved_spec, destination_callable, func_module
+            )
+
+        # remember spec
+        self._spec = resolved_spec or spec
+        super().__init__(
+            destination_name=destination_name,
+            environment=environment,
+            loader_file_format=loader_file_format,
+            batch_size=batch_size,
+            naming_convention=naming_convention,
+            destination_callable=conf_callable,
+            **kwargs,
+        )
diff --git a/dlt/destinations/sql_client.py b/dlt/destinations/sql_client.py
index 695f1a0972..9d872a238e 100644
--- a/dlt/destinations/sql_client.py
+++ b/dlt/destinations/sql_client.py
@@ -221,7 +221,7 @@ def _get_columns(self) -> List[str]:
         return [c[0] for c in self.native_cursor.description]
 
     def df(self, chunk_size: int = None, **kwargs: Any) -> Optional[DataFrame]:
-        from dlt.common.libs.pandas import _wrap_result
+        from dlt.common.libs.pandas_sql import _wrap_result
 
         columns = self._get_columns()
         if chunk_size is None:
diff --git a/dlt/extract/__init__.py b/dlt/extract/__init__.py
index 78e246cd46..03b2e59539 100644
--- a/dlt/extract/__init__.py
+++ b/dlt/extract/__init__.py
@@ -4,6 +4,7 @@
 from dlt.extract.decorators import source, resource, transformer, defer
 from dlt.extract.incremental import Incremental
 from dlt.extract.wrappers import wrap_additional_type
+from dlt.extract.extractors import materialize_schema_item
 
 __all__ = [
     "DltResource",
@@ -17,4 +18,5 @@
     "defer",
     "Incremental",
     "wrap_additional_type",
+    "materialize_schema_item",
 ]
diff --git a/dlt/extract/extract.py b/dlt/extract/extract.py
index 2ff813a2de..3b3d0704ea 100644
--- a/dlt/extract/extract.py
+++ b/dlt/extract/extract.py
@@ -244,6 +244,48 @@ def _compute_metrics(self, load_id: str, source: DltSource) -> ExtractMetrics:
             "hints": clean_hints,
         }
 
+    def _write_empty_files(
+        self, source: DltSource, extractors: Dict[TLoaderFileFormat, Extractor]
+    ) -> None:
+        schema = source.schema
+        json_extractor = extractors["puae-jsonl"]
+        resources_with_items = set().union(*[e.resources_with_items for e in extractors.values()])
+        # find REPLACE resources that did not yield any pipe items and create empty jobs for them
+        # NOTE: do not include tables that have never seen data
+        data_tables = {t["name"]: t for t in schema.data_tables(seen_data_only=True)}
+        tables_by_resources = utils.group_tables_by_resource(data_tables)
+        for resource in source.resources.selected.values():
+            if resource.write_disposition != "replace" or resource.name in resources_with_items:
+                continue
+            if resource.name not in tables_by_resources:
+                continue
+            for table in tables_by_resources[resource.name]:
+                # we only need to write empty files for the top tables
+                if not table.get("parent", None):
+                    json_extractor.write_empty_items_file(table["name"])
+
+        # collect resources that received empty materialized lists and had no items
+        resources_with_empty = (
+            set()
+            .union(*[e.resources_with_empty for e in extractors.values()])
+            .difference(resources_with_items)
+        )
+        # get all possible tables
+        data_tables = {t["name"]: t for t in schema.data_tables()}
+        tables_by_resources = utils.group_tables_by_resource(data_tables)
+        for resource_name in resources_with_empty:
+            if resource := source.resources.selected.get(resource_name):
+                if tables := tables_by_resources.get("resource_name"):
+                    # write empty tables
+                    for table in tables:
+                        # we only need to write empty files for the top tables
+                        if not table.get("parent", None):
+                            json_extractor.write_empty_items_file(table["name"])
+                else:
+                    table_name = json_extractor._get_static_table_name(resource, None)
+                    if table_name:
+                        json_extractor.write_empty_items_file(table_name)
+
     def _extract_single_source(
         self,
         load_id: str,
@@ -255,14 +297,11 @@ def _extract_single_source(
     ) -> None:
         schema = source.schema
         collector = self.collector
-        resources_with_items: Set[str] = set()
         extractors: Dict[TLoaderFileFormat, Extractor] = {
             "puae-jsonl": JsonLExtractor(
-                load_id, self.extract_storage, schema, resources_with_items, collector=collector
-            ),
-            "arrow": ArrowExtractor(
-                load_id, self.extract_storage, schema, resources_with_items, collector=collector
+                load_id, self.extract_storage, schema, collector=collector
             ),
+            "arrow": ArrowExtractor(load_id, self.extract_storage, schema, collector=collector),
         }
         last_item_format: Optional[TLoaderFileFormat] = None
 
@@ -294,23 +333,7 @@ def _extract_single_source(
                     extractors[item_format].write_items(resource, pipe_item.item, pipe_item.meta)
                     last_item_format = item_format
 
-                # find defined resources that did not yield any pipeitems and create empty jobs for them
-                # NOTE: do not include incomplete tables. those tables have never seen data so we do not need to reset them
-                data_tables = {t["name"]: t for t in schema.data_tables(include_incomplete=False)}
-                tables_by_resources = utils.group_tables_by_resource(data_tables)
-                for resource in source.resources.selected.values():
-                    if (
-                        resource.write_disposition != "replace"
-                        or resource.name in resources_with_items
-                    ):
-                        continue
-                    if resource.name not in tables_by_resources:
-                        continue
-                    for table in tables_by_resources[resource.name]:
-                        # we only need to write empty files for the top tables
-                        if not table.get("parent", None):
-                            extractors["puae-jsonl"].write_empty_items_file(table["name"])
-
+                self._write_empty_files(source, extractors)
                 if left_gens > 0:
                     # go to 100%
                     collector.update("Resources", left_gens)
diff --git a/dlt/extract/extractors.py b/dlt/extract/extractors.py
index 84abb4f3a8..52ecd66920 100644
--- a/dlt/extract/extractors.py
+++ b/dlt/extract/extractors.py
@@ -1,5 +1,5 @@
 from copy import copy
-from typing import Set, Dict, Any, Optional, Set
+from typing import Set, Dict, Any, Optional, List
 
 from dlt.common import logger
 from dlt.common.configuration.inject import with_config
@@ -29,13 +29,25 @@
     from dlt.common.libs.pyarrow import pyarrow as pa, TAnyArrowItem
 except MissingDependencyException:
     pyarrow = None
+    pa = None
 
 try:
-    from dlt.common.libs.pandas import pandas
+    from dlt.common.libs.pandas import pandas, pandas_to_arrow
 except MissingDependencyException:
     pandas = None
 
 
+class MaterializedEmptyList(List[Any]):
+    """A list variant that will materialize tables even if empty list was yielded"""
+
+    pass
+
+
+def materialize_schema_item() -> MaterializedEmptyList:
+    """Yield this to materialize schema in the destination, even if there's no data."""
+    return MaterializedEmptyList()
+
+
 class Extractor:
     file_format: TLoaderFileFormat
 
@@ -49,7 +61,6 @@ def __init__(
         load_id: str,
         storage: ExtractStorage,
         schema: Schema,
-        resources_with_items: Set[str],
         collector: Collector = NULL_COLLECTOR,
         *,
         _caps: DestinationCapabilitiesContext = None,
@@ -57,7 +68,10 @@ def __init__(
         self.schema = schema
         self.naming = schema.naming
         self.collector = collector
-        self.resources_with_items = resources_with_items
+        self.resources_with_items: Set[str] = set()
+        """Tracks resources that received items"""
+        self.resources_with_empty: Set[str] = set()
+        """Track resources that received empty materialized list"""
         self.load_id = load_id
         self._table_contracts: Dict[str, TSchemaContractDict] = {}
         self._filtered_tables: Set[str] = set()
@@ -130,6 +144,9 @@ def _write_item(
         self.collector.update(table_name, inc=new_rows_count)
         if new_rows_count > 0:
             self.resources_with_items.add(resource_name)
+        else:
+            if isinstance(items, MaterializedEmptyList):
+                self.resources_with_empty.add(resource_name)
 
     def _write_to_dynamic_table(self, resource: DltResource, items: TDataItems) -> None:
         if not isinstance(items, list):
@@ -224,7 +241,7 @@ def write_items(self, resource: DltResource, items: TDataItems, meta: Any) -> No
             for tbl in (
                 (
                     # 1. Convert pandas frame(s) to arrow Table
-                    pa.Table.from_pandas(item)
+                    pandas_to_arrow(item)
                     if (pandas and isinstance(item, pandas.DataFrame))
                     else item
                 )
@@ -295,7 +312,6 @@ def _compute_table(self, resource: DltResource, items: TDataItems) -> TPartialTa
         # issue warnings when overriding computed with arrow
         for col_name, column in arrow_table["columns"].items():
             if src_column := computed_table["columns"].get(col_name):
-                print(src_column)
                 for hint_name, hint in column.items():
                     if (src_hint := src_column.get(hint_name)) is not None:
                         if src_hint != hint:
diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py
index f298e414a1..54ce00a806 100644
--- a/dlt/extract/hints.py
+++ b/dlt/extract/hints.py
@@ -82,6 +82,8 @@ def make_hints(
     )
     if not table_name:
         new_template.pop("name")
+    if not write_disposition and "write_disposition" in new_template:
+        new_template.pop("write_disposition")
     # remember original columns
     if columns is not None:
         new_template["original_columns"] = columns
@@ -197,10 +199,11 @@ def apply_hints(
         """
         if not self._hints:
             # if there is no template yet, create and set a new one.
+            default_wd = None if parent_table_name else DEFAULT_WRITE_DISPOSITION
             t = make_hints(
                 table_name,
                 parent_table_name,
-                write_disposition,
+                write_disposition or default_wd,
                 columns,
                 primary_key,
                 merge_key,
diff --git a/dlt/extract/incremental/__init__.py b/dlt/extract/incremental/__init__.py
index 54e8b3d447..e74e87d094 100644
--- a/dlt/extract/incremental/__init__.py
+++ b/dlt/extract/incremental/__init__.py
@@ -6,6 +6,7 @@
 import inspect
 from functools import wraps
 
+
 import dlt
 from dlt.common.exceptions import MissingDependencyException
 from dlt.common import pendulum, logger
@@ -163,11 +164,12 @@ def _make_transforms(self) -> None:
             self._transformers[dt] = kls(
                 self.resource_name,
                 self.cursor_path,
+                self.initial_value,
                 self.start_value,
                 self.end_value,
-                self._cached_state,
                 self.last_value_func,
                 self._primary_key,
+                set(self._cached_state["unique_hashes"]),
             )
 
     @classmethod
@@ -453,14 +455,28 @@ def __call__(self, rows: TDataItems, meta: Any = None) -> Optional[TDataItems]:
             return rows
 
         transformer = self._get_transformer(rows)
-
         if isinstance(rows, list):
-            return [
+            rows = [
                 item
                 for item in (self._transform_item(transformer, row) for row in rows)
                 if item is not None
             ]
-        return self._transform_item(transformer, rows)
+        else:
+            rows = self._transform_item(transformer, rows)
+
+        # write back state
+        self._cached_state["last_value"] = transformer.last_value
+        if not transformer.deduplication_disabled:
+            # compute hashes for new last rows
+            unique_hashes = set(
+                transformer.compute_unique_value(row, self.primary_key)
+                for row in transformer.last_rows
+            )
+            # add directly computed hashes
+            unique_hashes.update(transformer.unique_hashes)
+            self._cached_state["unique_hashes"] = list(unique_hashes)
+
+        return rows
 
 
 Incremental.EMPTY = Incremental[Any]("")
diff --git a/dlt/extract/incremental/transform.py b/dlt/extract/incremental/transform.py
index e20617cf63..29b20de7b8 100644
--- a/dlt/extract/incremental/transform.py
+++ b/dlt/extract/incremental/transform.py
@@ -1,24 +1,23 @@
 from datetime import datetime, date  # noqa: I251
-from typing import Any, Optional, Tuple, List
+from typing import Any, Optional, Set, Tuple, List
 
 from dlt.common.exceptions import MissingDependencyException
 from dlt.common.utils import digest128
 from dlt.common.json import json
 from dlt.common import pendulum
-from dlt.common.typing import TDataItem, TDataItems
-from dlt.common.jsonpath import TJsonPath, find_values, JSONPathFields, compile_path
+from dlt.common.typing import TDataItem
+from dlt.common.jsonpath import find_values, JSONPathFields, compile_path
 from dlt.extract.incremental.exceptions import (
     IncrementalCursorPathMissing,
     IncrementalPrimaryKeyMissing,
 )
-from dlt.extract.incremental.typing import IncrementalColumnState, TCursorValue, LastValueFunc
+from dlt.extract.incremental.typing import TCursorValue, LastValueFunc
 from dlt.extract.utils import resolve_column_value
 from dlt.extract.items import TTableHintTemplate
 from dlt.common.schema.typing import TColumnNames
 
 try:
     from dlt.common.libs import pyarrow
-    from dlt.common.libs.pandas import pandas
     from dlt.common.libs.numpy import numpy
     from dlt.common.libs.pyarrow import pyarrow as pa, TAnyArrowItem
     from dlt.common.libs.pyarrow import from_arrow_scalar, to_arrow_scalar
@@ -26,6 +25,11 @@
     pa = None
     pyarrow = None
     numpy = None
+
+# NOTE: always import pandas independently from pyarrow
+try:
+    from dlt.common.libs.pandas import pandas, pandas_to_arrow
+except MissingDependencyException:
     pandas = None
 
 
@@ -34,19 +38,24 @@ def __init__(
         self,
         resource_name: str,
         cursor_path: str,
+        initial_value: Optional[TCursorValue],
         start_value: Optional[TCursorValue],
         end_value: Optional[TCursorValue],
-        incremental_state: IncrementalColumnState,
         last_value_func: LastValueFunc[TCursorValue],
         primary_key: Optional[TTableHintTemplate[TColumnNames]],
+        unique_hashes: Set[str],
     ) -> None:
         self.resource_name = resource_name
         self.cursor_path = cursor_path
+        self.initial_value = initial_value
         self.start_value = start_value
+        self.last_value = start_value
         self.end_value = end_value
-        self.incremental_state = incremental_state
+        self.last_rows: List[TDataItem] = []
         self.last_value_func = last_value_func
         self.primary_key = primary_key
+        self.unique_hashes = unique_hashes
+        self.start_unique_hashes = set(unique_hashes)
 
         # compile jsonpath
         self._compiled_cursor_path = compile_path(cursor_path)
@@ -59,20 +68,17 @@ def __init__(
             self.cursor_path = self._compiled_cursor_path.fields[0]
             self._compiled_cursor_path = None
 
-    def __call__(
-        self,
-        row: TDataItem,
-    ) -> Tuple[bool, bool, bool]: ...
-
-
-class JsonIncremental(IncrementalTransform):
-    def unique_value(
+    def compute_unique_value(
         self,
         row: TDataItem,
         primary_key: Optional[TTableHintTemplate[TColumnNames]],
-        resource_name: str,
     ) -> str:
         try:
+            assert not self.deduplication_disabled, (
+                f"{self.resource_name}: Attempt to compute unique values when deduplication is"
+                " disabled"
+            )
+
             if primary_key:
                 return digest128(json.dumps(resolve_column_value(primary_key, row), sort_keys=True))
             elif primary_key is None:
@@ -80,8 +86,20 @@ def unique_value(
             else:
                 return None
         except KeyError as k_err:
-            raise IncrementalPrimaryKeyMissing(resource_name, k_err.args[0], row)
+            raise IncrementalPrimaryKeyMissing(self.resource_name, k_err.args[0], row)
 
+    def __call__(
+        self,
+        row: TDataItem,
+    ) -> Tuple[bool, bool, bool]: ...
+
+    @property
+    def deduplication_disabled(self) -> bool:
+        """Skip deduplication when length of the key is 0"""
+        return isinstance(self.primary_key, (list, tuple)) and len(self.primary_key) == 0
+
+
+class JsonIncremental(IncrementalTransform):
     def find_cursor_value(self, row: TDataItem) -> Any:
         """Finds value in row at cursor defined by self.cursor_path.
 
@@ -113,7 +131,8 @@ def __call__(
             return row, False, False
 
         row_value = self.find_cursor_value(row)
-        last_value = self.incremental_state["last_value"]
+        last_value = self.last_value
+        last_value_func = self.last_value_func
 
         # For datetime cursor, ensure the value is a timezone aware datetime.
         # The object saved in state will always be a tz aware pendulum datetime so this ensures values are comparable
@@ -128,41 +147,45 @@ def __call__(
         # Check whether end_value has been reached
         # Filter end value ranges exclusively, so in case of "max" function we remove values >= end_value
         if self.end_value is not None and (
-            self.last_value_func((row_value, self.end_value)) != self.end_value
-            or self.last_value_func((row_value,)) == self.end_value
+            last_value_func((row_value, self.end_value)) != self.end_value
+            or last_value_func((row_value,)) == self.end_value
         ):
             return None, False, True
 
         check_values = (row_value,) + ((last_value,) if last_value is not None else ())
-        new_value = self.last_value_func(check_values)
+        new_value = last_value_func(check_values)
+        # new_value is "less" or equal to last_value (the actual max)
         if last_value == new_value:
-            processed_row_value = self.last_value_func((row_value,))
-            # we store row id for all records with the current "last_value" in state and use it to deduplicate
-
-            if processed_row_value == last_value:
-                unique_value = self.unique_value(row, self.primary_key, self.resource_name)
-                # if unique value exists then use it to deduplicate
-                if unique_value:
-                    if unique_value in self.incremental_state["unique_hashes"]:
-                        return None, False, False
-                    # add new hash only if the record row id is same as current last value
-                    self.incremental_state["unique_hashes"].append(unique_value)
-                return row, False, False
-            # skip the record that is not a last_value or new_value: that record was already processed
+            # use func to compute row_value into last_value compatible
+            processed_row_value = last_value_func((row_value,))
+            # skip the record that is not a start_value or new_value: that record was already processed
             check_values = (row_value,) + (
                 (self.start_value,) if self.start_value is not None else ()
             )
-            new_value = self.last_value_func(check_values)
+            new_value = last_value_func(check_values)
             # Include rows == start_value but exclude "lower"
-            if new_value == self.start_value and processed_row_value != self.start_value:
-                return None, True, False
-            else:
-                return row, False, False
+            # new_value is "less" or equal to start_value (the initial max)
+            if new_value == self.start_value:
+                # if equal there's still a chance that item gets in
+                if processed_row_value == self.start_value:
+                    if not self.deduplication_disabled:
+                        unique_value = self.compute_unique_value(row, self.primary_key)
+                        # if unique value exists then use it to deduplicate
+                        if unique_value in self.start_unique_hashes:
+                            return None, True, False
+                else:
+                    # smaller than start value: gets out
+                    return None, True, False
+
+            # we store row id for all records with the current "last_value" in state and use it to deduplicate
+            if processed_row_value == last_value:
+                # add new hash only if the record row id is same as current last value
+                self.last_rows.append(row)
         else:
-            self.incremental_state["last_value"] = new_value
-            unique_value = self.unique_value(row, self.primary_key, self.resource_name)
-            if unique_value:
-                self.incremental_state["unique_hashes"] = [unique_value]
+            self.last_value = new_value
+            # store rows with "max" values to compute hashes after processing full batch
+            self.last_rows = [row]
+            self.unique_hashes = set()
 
         return row, False, False
 
@@ -170,21 +193,25 @@ def __call__(
 class ArrowIncremental(IncrementalTransform):
     _dlt_index = "_dlt_index"
 
-    def unique_values(
-        self, item: "TAnyArrowItem", unique_columns: List[str], resource_name: str
+    def compute_unique_values(self, item: "TAnyArrowItem", unique_columns: List[str]) -> List[str]:
+        if not unique_columns:
+            return []
+        rows = item.select(unique_columns).to_pylist()
+        return [self.compute_unique_value(row, self.primary_key) for row in rows]
+
+    def compute_unique_values_with_index(
+        self, item: "TAnyArrowItem", unique_columns: List[str]
     ) -> List[Tuple[int, str]]:
         if not unique_columns:
             return []
-        item = item
         indices = item[self._dlt_index].to_pylist()
         rows = item.select(unique_columns).to_pylist()
         return [
-            (index, digest128(json.dumps(row, sort_keys=True))) for index, row in zip(indices, rows)
+            (index, self.compute_unique_value(row, self.primary_key))
+            for index, row in zip(indices, rows)
         ]
 
-    def _deduplicate(
-        self, tbl: "pa.Table", unique_columns: Optional[List[str]], aggregate: str, cursor_path: str
-    ) -> "pa.Table":
+    def _add_unique_index(self, tbl: "pa.Table") -> "pa.Table":
         """Creates unique index if necessary."""
         # create unique index if necessary
         if self._dlt_index not in tbl.schema.names:
@@ -197,7 +224,7 @@ def __call__(
     ) -> Tuple[TDataItem, bool, bool]:
         is_pandas = pandas is not None and isinstance(tbl, pandas.DataFrame)
         if is_pandas:
-            tbl = pa.Table.from_pandas(tbl)
+            tbl = pandas_to_arrow(tbl)
 
         primary_key = self.primary_key(tbl) if callable(self.primary_key) else self.primary_key
         if primary_key:
@@ -215,24 +242,18 @@ def __call__(
                 self._dlt_index = primary_key
         elif primary_key is None:
             unique_columns = tbl.schema.names
-        else:  # deduplicating is disabled
-            unique_columns = None
 
         start_out_of_range = end_out_of_range = False
         if not tbl:  # row is None or empty arrow table
             return tbl, start_out_of_range, end_out_of_range
 
-        last_value = self.incremental_state["last_value"]
-
         if self.last_value_func is max:
             compute = pa.compute.max
-            aggregate = "max"
             end_compare = pa.compute.less
             last_value_compare = pa.compute.greater_equal
             new_value_compare = pa.compute.greater
         elif self.last_value_func is min:
             compute = pa.compute.min
-            aggregate = "min"
             end_compare = pa.compute.greater
             last_value_compare = pa.compute.less_equal
             new_value_compare = pa.compute.less
@@ -267,64 +288,56 @@ def __call__(
             # NOTE: pyarrow bool *always* evaluates to python True. `as_py()` is necessary
             end_out_of_range = not end_compare(row_value_scalar, end_value_scalar).as_py()
 
-        if last_value is not None:
-            if self.start_value is not None:
-                # Remove rows lower than the last start value
-                keep_filter = last_value_compare(
-                    tbl[cursor_path], to_arrow_scalar(self.start_value, cursor_data_type)
+        if self.start_value is not None:
+            start_value_scalar = to_arrow_scalar(self.start_value, cursor_data_type)
+            # Remove rows lower or equal than the last start value
+            keep_filter = last_value_compare(tbl[cursor_path], start_value_scalar)
+            start_out_of_range = bool(pa.compute.any(pa.compute.invert(keep_filter)).as_py())
+            tbl = tbl.filter(keep_filter)
+            if not self.deduplication_disabled:
+                # Deduplicate after filtering old values
+                tbl = self._add_unique_index(tbl)
+                # Remove already processed rows where the cursor is equal to the start value
+                eq_rows = tbl.filter(pa.compute.equal(tbl[cursor_path], start_value_scalar))
+                # compute index, unique hash mapping
+                unique_values_index = self.compute_unique_values_with_index(eq_rows, unique_columns)
+                unique_values_index = [
+                    (i, uq_val)
+                    for i, uq_val in unique_values_index
+                    if uq_val in self.start_unique_hashes
+                ]
+                # find rows with unique ids that were stored from previous run
+                remove_idx = pa.array(i for i, _ in unique_values_index)
+                # Filter the table
+                tbl = tbl.filter(
+                    pa.compute.invert(pa.compute.is_in(tbl[self._dlt_index], remove_idx))
                 )
-                start_out_of_range = bool(pa.compute.any(pa.compute.invert(keep_filter)).as_py())
-                tbl = tbl.filter(keep_filter)
-
-            # Deduplicate after filtering old values
-            last_value_scalar = to_arrow_scalar(last_value, cursor_data_type)
-            tbl = self._deduplicate(tbl, unique_columns, aggregate, cursor_path)
-            # Remove already processed rows where the cursor is equal to the last value
-            eq_rows = tbl.filter(pa.compute.equal(tbl[cursor_path], last_value_scalar))
-            # compute index, unique hash mapping
-            unique_values = self.unique_values(eq_rows, unique_columns, self.resource_name)
-            unique_values = [
-                (i, uq_val)
-                for i, uq_val in unique_values
-                if uq_val in self.incremental_state["unique_hashes"]
-            ]
-            remove_idx = pa.array(i for i, _ in unique_values)
-            # Filter the table
-            tbl = tbl.filter(pa.compute.invert(pa.compute.is_in(tbl[self._dlt_index], remove_idx)))
-
-            if (
-                new_value_compare(row_value_scalar, last_value_scalar).as_py()
-                and row_value != last_value
-            ):  # Last value has changed
-                self.incremental_state["last_value"] = row_value
+
+        if (
+            self.last_value is None
+            or new_value_compare(
+                row_value_scalar, to_arrow_scalar(self.last_value, cursor_data_type)
+            ).as_py()
+        ):  # Last value has changed
+            self.last_value = row_value
+            if not self.deduplication_disabled:
                 # Compute unique hashes for all rows equal to row value
-                self.incremental_state["unique_hashes"] = [
-                    uq_val
-                    for _, uq_val in self.unique_values(
+                self.unique_hashes = set(
+                    self.compute_unique_values(
                         tbl.filter(pa.compute.equal(tbl[cursor_path], row_value_scalar)),
                         unique_columns,
-                        self.resource_name,
-                    )
-                ]
-            else:
-                # last value is unchanged, add the hashes
-                self.incremental_state["unique_hashes"] = list(
-                    set(
-                        self.incremental_state["unique_hashes"]
-                        + [uq_val for _, uq_val in unique_values]
                     )
                 )
-        else:
-            tbl = self._deduplicate(tbl, unique_columns, aggregate, cursor_path)
-            self.incremental_state["last_value"] = row_value
-            self.incremental_state["unique_hashes"] = [
-                uq_val
-                for _, uq_val in self.unique_values(
-                    tbl.filter(pa.compute.equal(tbl[cursor_path], row_value_scalar)),
-                    unique_columns,
-                    self.resource_name,
+        elif self.last_value == row_value and not self.deduplication_disabled:
+            # last value is unchanged, add the hashes
+            self.unique_hashes.update(
+                set(
+                    self.compute_unique_values(
+                        tbl.filter(pa.compute.equal(tbl[cursor_path], row_value_scalar)),
+                        unique_columns,
+                    )
                 )
-            ]
+            )
 
         if len(tbl) == 0:
             return None, start_out_of_range, end_out_of_range
diff --git a/dlt/extract/wrappers.py b/dlt/extract/wrappers.py
index 7ffb6b4fc6..e761fcdeab 100644
--- a/dlt/extract/wrappers.py
+++ b/dlt/extract/wrappers.py
@@ -6,11 +6,17 @@
 
 try:
     from dlt.common.libs.pandas import pandas
+
+    PandaFrame = pandas.DataFrame
+except MissingDependencyException:
+    PandaFrame = NoneType
+
+try:
     from dlt.common.libs.pyarrow import pyarrow
 
-    PandaFrame, ArrowTable, ArrowRecords = pandas.DataFrame, pyarrow.Table, pyarrow.RecordBatch
+    ArrowTable, ArrowRecords = pyarrow.Table, pyarrow.RecordBatch
 except MissingDependencyException:
-    PandaFrame, ArrowTable, ArrowRecords = NoneType, NoneType, NoneType
+    ArrowTable, ArrowRecords = NoneType, NoneType
 
 
 def wrap_additional_type(data: Any) -> Any:
diff --git a/dlt/helpers/airflow_helper.py b/dlt/helpers/airflow_helper.py
index 9a6616e9ea..e01cf790d2 100644
--- a/dlt/helpers/airflow_helper.py
+++ b/dlt/helpers/airflow_helper.py
@@ -1,7 +1,7 @@
 import functools
 import os
 from tempfile import gettempdir
-from typing import Any, Callable, List, Literal, Optional, Sequence, Tuple
+from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple
 
 from tenacity import (
     retry_if_exception,
@@ -103,6 +103,7 @@ def __init__(
         """
 
         super().__init__(group_id=pipeline_name, **kwargs)
+        self._used_names: Dict[str, Any] = {}
         self.use_task_logger = use_task_logger
         self.log_progress_period = log_progress_period
         self.buffer_max_items = buffer_max_items
@@ -132,6 +133,33 @@ def __init__(
         if ConfigProvidersContext in Container():
             del Container()[ConfigProvidersContext]
 
+    def _task_name(self, pipeline: Pipeline, data: Any) -> str:
+        """Generate a task name.
+
+        Args:
+            pipeline (Pipeline): The pipeline to run.
+            data (Any): The data to run the pipeline with.
+
+        Returns:
+            str: The name of the task.
+        """
+        task_name = pipeline.pipeline_name
+
+        if isinstance(data, DltSource):
+            resource_names = list(data.selected_resources.keys())
+            task_name = data.name + "_" + "-".join(resource_names[:4])
+
+            if len(resource_names) > 4:
+                task_name += f"-{len(resource_names)-4}-more"
+
+            num = self._used_names.setdefault(task_name, 0)
+            self._used_names[task_name] = num + 1
+
+            if num:
+                task_name += f"-{num + 1}"
+
+        return task_name
+
     def run(
         self,
         pipeline: Pipeline,
@@ -175,7 +203,7 @@ def run(
             schema_contract=schema_contract,
             pipeline_name=pipeline_name,
         )
-        return PythonOperator(task_id=_task_name(pipeline, data), python_callable=f, **kwargs)
+        return PythonOperator(task_id=self._task_name(pipeline, data), python_callable=f, **kwargs)
 
     def _run(
         self,
@@ -363,7 +391,7 @@ def make_task(pipeline: Pipeline, data: Any, name: str = None) -> PythonOperator
                     pipeline_name=name,
                 )
                 return PythonOperator(
-                    task_id=_task_name(pipeline, data), python_callable=f, **kwargs
+                    task_id=self._task_name(pipeline, data), python_callable=f, **kwargs
                 )
 
             if decompose == "none":
@@ -393,7 +421,7 @@ def make_task(pipeline: Pipeline, data: Any, name: str = None) -> PythonOperator
 
                 tasks = []
                 sources = data.decompose("scc")
-                t_name = _task_name(pipeline, data)
+                t_name = self._task_name(pipeline, data)
                 start = make_task(pipeline, sources[0])
 
                 # parallel tasks
@@ -434,16 +462,18 @@ def make_task(pipeline: Pipeline, data: Any, name: str = None) -> PythonOperator
                 start = make_task(
                     pipeline,
                     sources[0],
-                    naming.normalize_identifier(_task_name(pipeline, sources[0])),
+                    naming.normalize_identifier(self._task_name(pipeline, sources[0])),
                 )
 
                 # parallel tasks
                 for source in sources[1:]:
                     # name pipeline the same as task
-                    new_pipeline_name = naming.normalize_identifier(_task_name(pipeline, source))
+                    new_pipeline_name = naming.normalize_identifier(
+                        self._task_name(pipeline, source)
+                    )
                     tasks.append(make_task(pipeline, source, new_pipeline_name))
 
-                t_name = _task_name(pipeline, data)
+                t_name = self._task_name(pipeline, data)
                 end = DummyOperator(task_id=f"{t_name}_end")
 
                 if tasks:
@@ -468,25 +498,3 @@ def airflow_get_execution_dates() -> Tuple[pendulum.DateTime, Optional[pendulum.
         return context["data_interval_start"], context["data_interval_end"]
     except Exception:
         return None, None
-
-
-def _task_name(pipeline: Pipeline, data: Any) -> str:
-    """Generate a task name.
-
-    Args:
-        pipeline (Pipeline): The pipeline to run.
-        data (Any): The data to run the pipeline with.
-
-    Returns:
-        str: The name of the task.
-    """
-    task_name = pipeline.pipeline_name
-
-    if isinstance(data, DltSource):
-        resource_names = list(data.selected_resources.keys())
-        task_name = data.name + "_" + "-".join(resource_names[:4])
-
-        if len(resource_names) > 4:
-            task_name += f"-{len(resource_names)-4}-more"
-
-    return task_name
diff --git a/dlt/helpers/streamlit_app/__init__.py b/dlt/helpers/streamlit_app/__init__.py
new file mode 100644
index 0000000000..b304195a5a
--- /dev/null
+++ b/dlt/helpers/streamlit_app/__init__.py
@@ -0,0 +1,11 @@
+from dlt.common.exceptions import MissingDependencyException
+
+# FIXME: Remove this after implementing package installer
+try:
+    import streamlit
+except ModuleNotFoundError:
+    raise MissingDependencyException(
+        "DLT Streamlit Helpers",
+        ["streamlit"],
+        "DLT Helpers for Streamlit should be run within a streamlit app.",
+    )
diff --git a/dlt/helpers/streamlit_app/blocks/__init__.py b/dlt/helpers/streamlit_app/blocks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/dlt/helpers/streamlit_app/blocks/load_info.py b/dlt/helpers/streamlit_app/blocks/load_info.py
new file mode 100644
index 0000000000..134b5ad5a4
--- /dev/null
+++ b/dlt/helpers/streamlit_app/blocks/load_info.py
@@ -0,0 +1,40 @@
+import dlt
+import humanize
+import streamlit as st
+
+from dlt.common import pendulum
+from dlt.helpers.streamlit_app.utils import query_data_live
+from dlt.helpers.streamlit_app.widgets import stat
+
+
+def last_load_info(pipeline: dlt.Pipeline) -> None:
+    loads_df = query_data_live(
+        pipeline,
+        f"SELECT load_id, inserted_at FROM {pipeline.default_schema.loads_table_name} WHERE"
+        " status = 0 ORDER BY inserted_at DESC LIMIT 101 ",
+    )
+
+    if loads_df is None:
+        st.error(
+            "Load info is not available",
+            icon="🚨",
+        )
+    else:
+        loads_no = loads_df.shape[0]
+        if loads_df.shape[0] > 0:
+            rel_time = (
+                humanize.naturaldelta(
+                    pendulum.now() - pendulum.from_timestamp(loads_df.iloc[0, 1].timestamp())
+                )
+                + " ago"
+            )
+            last_load_id = loads_df.iloc[0, 0]
+            if loads_no > 100:
+                loads_no = "> " + str(loads_no)
+        else:
+            rel_time = "---"
+            last_load_id = "---"
+
+        stat("Last load time", rel_time, border_left_width=4)
+        stat("Last load id", last_load_id)
+        stat("Total number of loads", loads_no)
diff --git a/dlt/helpers/streamlit_app/blocks/menu.py b/dlt/helpers/streamlit_app/blocks/menu.py
new file mode 100644
index 0000000000..b6d0b5f7aa
--- /dev/null
+++ b/dlt/helpers/streamlit_app/blocks/menu.py
@@ -0,0 +1,14 @@
+import dlt
+import streamlit as st
+
+from dlt.helpers.streamlit_app.utils import HERE
+from dlt.helpers.streamlit_app.widgets import logo, mode_selector
+from dlt.helpers.streamlit_app.widgets import pipeline_summary
+
+
+def menu(pipeline: dlt.Pipeline) -> None:
+    mode_selector()
+    logo()
+    st.page_link(f"{HERE}/pages/dashboard.py", label="Explore data", icon="🕹️")
+    st.page_link(f"{HERE}/pages/load_info.py", label="Load info", icon="💾")
+    pipeline_summary(pipeline)
diff --git a/dlt/helpers/streamlit_app/blocks/query.py b/dlt/helpers/streamlit_app/blocks/query.py
new file mode 100644
index 0000000000..a03e9a0cd9
--- /dev/null
+++ b/dlt/helpers/streamlit_app/blocks/query.py
@@ -0,0 +1,57 @@
+from typing import Optional
+import dlt
+import streamlit as st
+
+from dlt.common.exceptions import MissingDependencyException
+from dlt.helpers.streamlit_app.utils import query_data
+
+
+def maybe_run_query(
+    pipeline: dlt.Pipeline,
+    show_charts: bool = True,
+    example_query: Optional[str] = "",
+) -> None:
+    st.subheader("Run your query")
+    sql_query = st.text_area("Enter your SQL query", value=example_query)
+    if st.button("Run Query"):
+        if sql_query:
+            try:
+                # run the query from the text area
+                df = query_data(pipeline, sql_query, chunk_size=2048)
+                if df is None:
+                    st.text("No rows returned")
+                else:
+                    rows_count = df.shape[0]
+                    st.text(f"{rows_count} row(s) returned")
+                    st.dataframe(df)
+                    try:
+                        # now if the dataset has supported shape try to display the bar or altair chart
+                        if df.dtypes.shape[0] == 1 and show_charts:
+                            # try barchart
+                            st.bar_chart(df)
+                        if df.dtypes.shape[0] == 2 and show_charts:
+                            # try to import altair charts
+                            try:
+                                import altair as alt
+                            except ModuleNotFoundError:
+                                raise MissingDependencyException(
+                                    "DLT Streamlit Helpers",
+                                    ["altair"],
+                                    "DLT Helpers for Streamlit should be run within a streamlit"
+                                    " app.",
+                                )
+
+                            # try altair
+                            bar_chart = (
+                                alt.Chart(df)
+                                .mark_bar()
+                                .encode(
+                                    x=f"{df.columns[1]}:Q", y=alt.Y(f"{df.columns[0]}:N", sort="-x")
+                                )
+                            )
+                            st.altair_chart(bar_chart, use_container_width=True)
+                    except Exception as ex:
+                        st.error(f"Chart failed due to: {ex}")
+            except Exception as ex:
+                st.text("Exception when running query")
+                st.exception(ex)
diff --git a/dlt/helpers/streamlit_app/blocks/resource_state.py b/dlt/helpers/streamlit_app/blocks/resource_state.py
new file mode 100644
index 0000000000..8ea1256a1f
--- /dev/null
+++ b/dlt/helpers/streamlit_app/blocks/resource_state.py
@@ -0,0 +1,29 @@
+import dlt
+import streamlit as st
+import yaml
+
+from dlt.common import json
+from dlt.common.libs.pandas import pandas as pd
+from dlt.common.pipeline import resource_state, TSourceState
+from dlt.common.schema.utils import group_tables_by_resource
+from dlt.helpers.streamlit_app.widgets.tags import tag
+
+
+def resource_state_info(
+    pipeline: dlt.Pipeline,
+    schema_name: str,
+    resource_name: str,
+) -> None:
+    sources_state = pipeline.state.get("sources") or {}
+    schema = sources_state.get(schema_name)
+    if not schema:
+        st.error(f"Schema with name: {schema_name} is not found")
+        return
+
+    resource = schema["resources"].get(resource_name)
+    with st.expander("Resource state", expanded=(resource is None)):
+        if not resource:
+            st.info(f"{resource_name} is missing resource state")
+        else:
+            spec = yaml.safe_dump(resource)
+            st.code(spec, language="yaml")
diff --git a/dlt/helpers/streamlit_app/blocks/show_data.py b/dlt/helpers/streamlit_app/blocks/show_data.py
new file mode 100644
index 0000000000..7aaab084f3
--- /dev/null
+++ b/dlt/helpers/streamlit_app/blocks/show_data.py
@@ -0,0 +1,21 @@
+from typing import List
+
+import dlt
+import streamlit as st
+
+from dlt.helpers.streamlit_app.utils import query_data
+
+
+def show_data_button(pipeline: dlt.Pipeline, table_name: str) -> None:
+    if st.button("SHOW DATA", key=table_name):
+        df = query_data(pipeline, f"SELECT * FROM {table_name}", chunk_size=2048)
+        if df is None:
+            st.text("No rows returned")
+        else:
+            rows_count = df.shape[0]
+            if df.shape[0] < 2048:
+                st.text(f"All {rows_count} row(s)")
+            else:
+                st.text(f"Top {rows_count} row(s)")
+
+            st.dataframe(df)
diff --git a/dlt/helpers/streamlit_app/blocks/table_hints.py b/dlt/helpers/streamlit_app/blocks/table_hints.py
new file mode 100644
index 0000000000..aefab952e5
--- /dev/null
+++ b/dlt/helpers/streamlit_app/blocks/table_hints.py
@@ -0,0 +1,80 @@
+from typing import Any, Dict, List
+
+import dlt
+import streamlit as st
+
+from dlt.common.schema.typing import TTableSchema
+from dlt.common.utils import flatten_list_or_items
+from dlt.helpers.streamlit_app.blocks.resource_state import resource_state_info
+from dlt.helpers.streamlit_app.blocks.show_data import show_data_button
+
+
+def list_table_hints(pipeline: dlt.Pipeline, tables: List[TTableSchema]) -> None:
+    current_schema = st.session_state["schema"] or pipeline.default_schema
+    if st.session_state["schema"]:
+        current_schema = st.session_state["schema"]
+
+    for table in tables:
+        table_hints: List[str] = []
+        if "parent" in table:
+            table_hints.append("parent: **%s**" % table["parent"])
+
+        if "resource" in table:
+            table_hints.append("resource: **%s**" % table["resource"])
+
+        if "write_disposition" in table:
+            table_hints.append("write disposition: **%s**" % table["write_disposition"])
+
+        columns = table["columns"]
+        primary_keys: List[str] = list(
+            flatten_list_or_items(
+                [
+                    col_name
+                    for col_name in columns.keys()
+                    if not col_name.startswith("_")
+                    and columns[col_name].get("primary_key") is not None
+                ]
+            )
+        )
+        if primary_keys:
+            table_hints.append("primary key(s): **%s**" % ", ".join(primary_keys))
+
+        merge_keys = list(
+            flatten_list_or_items(
+                [
+                    col_name
+                    for col_name in columns.keys()
+                    if not col_name.startswith("_")
+                    and not columns[col_name].get("merge_key") is None  # noqa: E714
+                ]
+            )
+        )
+
+        if merge_keys:
+            table_hints.append("merge key(s): **%s**" % ", ".join(merge_keys))
+
+        st.subheader(f"Table: {table['name']}", divider=True)
+        st.markdown(" | ".join(table_hints))
+        if "resource" in table:
+            resource_state_info(
+                pipeline,
+                current_schema.name,
+                table["resource"],
+            )
+
+        # table schema contains various hints (like clustering or partition options)
+        # that we do not want to show in basic view
+        def essentials_f(c: Any) -> Dict[str, Any]:
+            essentials: Dict[str, Any] = {}
+            for k, v in c.items():
+                if k in ["name", "data_type", "nullable"]:
+                    essentials[k] = v
+
+            return {
+                "name": essentials["name"],
+                "data_type": essentials["data_type"],
+                "nullable": essentials["nullable"],
+            }
+
+        st.table(map(essentials_f, table["columns"].values()))
+        show_data_button(pipeline, table["name"])
diff --git a/dlt/helpers/streamlit_app/index.py b/dlt/helpers/streamlit_app/index.py
new file mode 100644
index 0000000000..31fb470640
--- /dev/null
+++ b/dlt/helpers/streamlit_app/index.py
@@ -0,0 +1,6 @@
+import streamlit as st
+
+from dlt.helpers.streamlit_app.utils import HERE
+
+if __name__ == "__main__":
+    st.switch_page(f"{HERE}/pages/dashboard.py")
diff --git a/dlt/helpers/streamlit_app/pages/__init__.py b/dlt/helpers/streamlit_app/pages/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/dlt/helpers/streamlit_app/pages/dashboard.py b/dlt/helpers/streamlit_app/pages/dashboard.py
new file mode 100644
index 0000000000..656dd6ecdf
--- /dev/null
+++ b/dlt/helpers/streamlit_app/pages/dashboard.py
@@ -0,0 +1,53 @@
+import dlt
+import streamlit as st
+
+from dlt.helpers.streamlit_app.blocks.query import maybe_run_query
+from dlt.helpers.streamlit_app.blocks.table_hints import list_table_hints
+from dlt.helpers.streamlit_app.blocks.menu import menu
+from dlt.helpers.streamlit_app.utils import render_with_pipeline
+from dlt.helpers.streamlit_app.widgets import schema_picker
+from dlt.pipeline import Pipeline
+
+
+def write_data_explorer_page(
+    pipeline: Pipeline,
+    schema_name: str = None,
+    example_query: str = "",
+    show_charts: bool = True,
+) -> None:
+    """Writes Streamlit app page with a schema and live data preview.
+
+    #### Args:
+        pipeline (Pipeline): Pipeline instance to use.
+        schema_name (str, optional): Name of the schema to display. If None, default schema is used.
+        example_query (str, optional): Example query to be displayed in the SQL Query box.
+        show_charts (bool, optional): Should automatically show charts for the queries from SQL Query box. Defaults to True.
+
+    Raises:
+        MissingDependencyException: Raised when a particular python dependency is not installed
+    """
+
+    st.subheader("Schemas and tables", divider="rainbow")
+    schema_picker(pipeline)
+    tables = sorted(
+        st.session_state["schema"].data_tables(),
+        key=lambda table: table["name"],
+    )
+
+    list_table_hints(pipeline, tables)
+    maybe_run_query(
+        pipeline,
+        show_charts=show_charts,
+        example_query=example_query,
+    )
+
+
+def show(pipeline: dlt.Pipeline) -> None:
+    with st.sidebar:
+        menu(pipeline)
+
+    write_data_explorer_page(pipeline)
+
+
+if __name__ == "__main__":
+    render_with_pipeline(show)
diff --git a/dlt/helpers/streamlit_app/pages/load_info.py b/dlt/helpers/streamlit_app/pages/load_info.py
new file mode 100644
index 0000000000..ee13cf2531
--- /dev/null
+++ b/dlt/helpers/streamlit_app/pages/load_info.py
@@ -0,0 +1,130 @@
+import dlt
+import streamlit as st
+
+from dlt.common.configuration.exceptions import ConfigFieldMissingException
+from dlt.common.destination.reference import WithStateSync
+from dlt.helpers.streamlit_app.blocks.load_info import last_load_info
+from dlt.helpers.streamlit_app.blocks.menu import menu
+from dlt.helpers.streamlit_app.widgets import stat
+from dlt.helpers.streamlit_app.utils import (
+    query_data,
+    query_data_live,
+    render_with_pipeline,
+)
+from dlt.pipeline import Pipeline
+from dlt.pipeline.exceptions import CannotRestorePipelineException
+from dlt.pipeline.state_sync import load_pipeline_state_from_destination
+
+
+def write_load_status_page(pipeline: Pipeline) -> None:
+    """Display pipeline loading information."""
+
+    try:
+        loads_df = query_data_live(
+            pipeline,
+            f"SELECT load_id, inserted_at FROM {pipeline.default_schema.loads_table_name} WHERE"
+            " status = 0 ORDER BY inserted_at DESC LIMIT 101 ",
+        )
+
+        if loads_df is not None:
+            selected_load_id = st.selectbox("Select load id", loads_df)
+            schema = pipeline.default_schema
+
+            st.markdown("**Number of loaded rows:**")
+
+            # construct a union query
+            query_parts = []
+            for table in schema.data_tables():
+                if "parent" in table:
+                    continue
+                table_name = table["name"]
+                query_parts.append(
+                    f"SELECT '{table_name}' as table_name, COUNT(1) As rows_count FROM"
+                    f" {table_name} WHERE _dlt_load_id = '{selected_load_id}'"
+                )
+                query_parts.append("UNION ALL")
+
+            query_parts.pop()
+            rows_counts_df = query_data(pipeline, "\n".join(query_parts))
+
+            st.markdown(f"Rows loaded in **{selected_load_id}**")
+            st.dataframe(rows_counts_df)
+
+            st.markdown("**Last 100 loads**")
+            st.dataframe(loads_df)
+
+            st.subheader("Schema updates", divider=True)
+            schemas_df = query_data_live(
+                pipeline,
+                "SELECT schema_name, inserted_at, version, version_hash FROM"
+                f" {pipeline.default_schema.version_table_name} ORDER BY inserted_at DESC LIMIT"
+                " 101 ",
+            )
+            st.markdown("**100 recent schema updates**")
+            st.dataframe(schemas_df)
+    except CannotRestorePipelineException as restore_ex:
+        st.error("Seems like the pipeline does not exist. Did you run it at least once?")
+        st.exception(restore_ex)
+
+    except ConfigFieldMissingException as cf_ex:
+        st.error(
+            "Pipeline credentials/configuration is missing. This most often happen when you run the"
+            " streamlit app from different folder than the `.dlt` with `toml` files resides."
+        )
+        st.text(str(cf_ex))
+
+    except Exception as ex:
+        st.error("Pipeline info could not be prepared. Did you load the data at least once?")
+        st.exception(ex)
+
+
+def show_state_versions(pipeline: dlt.Pipeline) -> None:
+    st.subheader("State info", divider=True)
+    remote_state = None
+    with pipeline.destination_client() as client:
+        if isinstance(client, WithStateSync):
+            remote_state = load_pipeline_state_from_destination(pipeline.pipeline_name, client)
+
+    local_state = pipeline.state
+
+    remote_state_version = "---"
+    if remote_state:
+        remote_state_version = str(remote_state["_state_version"])
+
+    col1, col2 = st.columns(2)
+    with col1:
+        stat(
+            label="Local version",
+            value=local_state["_state_version"],
+            display="block",
+            border_left_width=4,
+        )
+
+    with col2:
+        stat(
+            label="Remote version",
+            value=remote_state_version,
+            display="block",
+            border_left_width=4,
+        )
+
+    if remote_state_version != str(local_state["_state_version"]):
+        st.text("")
+        st.warning(
+            "Looks like that local state is not yet synchronized or synchronization is disabled",
+            icon="⚠️",
+        )
+
+
+def show(pipeline: dlt.Pipeline) -> None:
+    st.subheader("Load info", divider="rainbow")
+    last_load_info(pipeline)
+    write_load_status_page(pipeline)
+    show_state_versions(pipeline)
+
+    with st.sidebar:
+        menu(pipeline)
+
+
+if __name__ == "__main__":
+    render_with_pipeline(show)
diff --git a/dlt/helpers/streamlit_app/theme.py b/dlt/helpers/streamlit_app/theme.py
new file mode 100644
index 0000000000..3b6b600a73
--- /dev/null
+++ b/dlt/helpers/streamlit_app/theme.py
@@ -0,0 +1,29 @@
+import streamlit as st
+
+
+def dark_theme() -> None:
+    st.config.set_option("theme.base", "dark")
+    st.config.set_option("theme.primaryColor", "#191937")
+
+    # Main background
+    st.config.set_option("theme.backgroundColor", "#4C4898")
+
+    # Sidebar
+    st.config.set_option("theme.secondaryBackgroundColor", "#191937")
+
+    # Text
+    st.config.set_option("theme.textColor", "#FEFEFA")
+
+
+def light_theme() -> None:
+    st.config.set_option("theme.base", "light")
+    st.config.set_option("theme.primaryColor", "#333")
+
+    # Main background
+    st.config.set_option("theme.backgroundColor", "#FEFEFE")
+
+    # Sidebar
+    st.config.set_option("theme.secondaryBackgroundColor", "#ededed")
+
+    # Text
+    st.config.set_option("theme.textColor", "#333")
diff --git a/dlt/helpers/streamlit_app/utils.py b/dlt/helpers/streamlit_app/utils.py
new file mode 100644
index 0000000000..6b2dab495c
--- /dev/null
+++ b/dlt/helpers/streamlit_app/utils.py
@@ -0,0 +1,77 @@
+import argparse
+import os
+
+from pathlib import Path
+from typing import Optional, Callable, Tuple
+
+import dlt
+import pandas as pd
+import streamlit as st
+
+from dlt.cli import echo as fmt
+from dlt.pipeline.exceptions import SqlClientNotAvailable
+
+HERE = Path(__file__).absolute().parent
+
+
+def parse_args() -> Tuple[str, str]:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("pipeline_name", nargs=1)
+    parser.add_argument(
+        "--pipelines-dir",
+        help="Pipelines working directory",
+        default=None,
+    )
+    known_options, _ = parser.parse_known_args()
+    return known_options.pipeline_name[0], known_options.pipelines_dir
+
+
+def render_with_pipeline(render_func: Callable[..., None]) -> None:
+    pipeline_name, pipelines_dir = parse_args()
+    if test_pipeline_name := os.getenv("DLT_TEST_PIPELINE_NAME"):
+        fmt.echo(f"RUNNING TEST PIPELINE: {test_pipeline_name}")
+        pipeline_name = test_pipeline_name
+
+    st.session_state["pipeline_name"] = pipeline_name
+    # use pipelines dir from env var or try to resolve it using get_dlt_pipelines_dir
+    pipeline = dlt.attach(pipeline_name, pipelines_dir=pipelines_dir)
+    render_func(pipeline)
+
+
+def query_using_cache(
+    pipeline: dlt.Pipeline, ttl: int
+) -> Callable[..., Optional[pd.DataFrame]]:
+    @st.cache_data(ttl=ttl)
+    def do_query(  # type: ignore[return]
+        query: str,
+        schema_name: str = None,
+        chunk_size: Optional[int] = None,
+    ) -> Optional[pd.DataFrame]:
+        try:
+            with pipeline.sql_client(schema_name) as client:
+                with client.execute_query(query) as curr:
+                    return curr.df(chunk_size=chunk_size)
+        except SqlClientNotAvailable:
+            st.error("🚨 Cannot load data - SqlClient not available")
+
+    return do_query  # type: ignore
+
+
+def query_data(
+    pipeline: dlt.Pipeline,
+    query: str,
+    schema_name: str = None,
+    chunk_size: Optional[int] = None,
+) -> pd.DataFrame:
+    query_maker = query_using_cache(pipeline, ttl=600)
+    return query_maker(query, schema_name, chunk_size=chunk_size)
+
+
+def query_data_live(
+    pipeline: dlt.Pipeline,
+    query: str,
+    schema_name: str = None,
+    chunk_size: Optional[int] = None,
+) -> pd.DataFrame:
+    query_maker = query_using_cache(pipeline, ttl=5)
+    return query_maker(query, schema_name, chunk_size=chunk_size)
diff --git a/dlt/helpers/streamlit_app/widgets/__init__.py b/dlt/helpers/streamlit_app/widgets/__init__.py
new file mode 100644
index 0000000000..349d58166e
--- /dev/null
+++ b/dlt/helpers/streamlit_app/widgets/__init__.py
@@ -0,0 +1,6 @@
+from dlt.helpers.streamlit_app.widgets.logo import logo
+from dlt.helpers.streamlit_app.widgets.stats import stat
+from dlt.helpers.streamlit_app.widgets.summary import pipeline_summary
+from dlt.helpers.streamlit_app.widgets.tags import tag
+from dlt.helpers.streamlit_app.widgets.schema import schema_picker
+from dlt.helpers.streamlit_app.widgets.color_mode_selector import mode_selector
diff --git a/dlt/helpers/streamlit_app/widgets/color_mode_selector.py b/dlt/helpers/streamlit_app/widgets/color_mode_selector.py
new file mode 100644
index 0000000000..fba3231a34
--- /dev/null
+++ b/dlt/helpers/streamlit_app/widgets/color_mode_selector.py
@@ -0,0 +1,34 @@
+import streamlit as st
+
+from typing_extensions import Callable, Literal
+
+from dlt.helpers.streamlit_app.theme import dark_theme, light_theme
+
+ColorMode = Literal["light", "dark"]
+
+
+def set_color_mode(mode: ColorMode) -> Callable[..., None]:
+    def set_mode() -> None:
+        st.session_state["color_mode"] = mode
+        if mode and mode == "dark":
+            dark_theme()
+        else:
+            light_theme()
+
+    return set_mode
+
+
+def mode_selector() -> None:
+    columns = st.columns(10)
+    light = columns[3]
+    dark = columns[5]
+
+    # Set default theme to light if it wasn't set before
+    if not st.session_state.get("color_mode"):
+        st.session_state["color_mode"] = "light"
+        st.config.set_option("theme.base", "light")
+
+    with light:
+        st.button("☀️", on_click=set_color_mode("light"))
+    with dark:
+        st.button("🌚", on_click=set_color_mode("dark"))
diff --git a/dlt/helpers/streamlit_app/widgets/logo.py b/dlt/helpers/streamlit_app/widgets/logo.py
new file mode 100644
index 0000000000..41a5afff44
--- /dev/null
+++ b/dlt/helpers/streamlit_app/widgets/logo.py
@@ -0,0 +1,46 @@
+import streamlit as st
+
+
+def logo() -> None:
+    logo_text = """
+    <div class="logo">
+        <span class="dlt">dlt</span>
+        <span class="hub">Hub</span>
+    </div>
+    """
+    styles = """
+    <style>
+        .logo {
+            margin-top: -120px;
+            margin-left: 36%;
+            margin-bottom: 0;
+            width: 60%;
+            font-size: 2em;
+            letter-spacing: -1.8px;
+        }
+
+        .dlt {
+            position: relative;
+            color: #58c1d5;
+        }
+        .dlt:after {
+            position: absolute;
+            bottom: 9px;
+            right: -3px;
+            content: " ";
+            width: 3px;
+            height: 3px;
+            border-radius: 1px;
+            border-top-left-radius: 2px;
+            border-bottom-right-radius: 2px;
+            border: 0;
+            background: #c4d200;
+        }
+
+        .hub {
+            color: #c4d200;
+        }
+        </style>
+    """
+
+    st.markdown(logo_text + styles, unsafe_allow_html=True)
diff --git a/dlt/helpers/streamlit_app/widgets/schema.py b/dlt/helpers/streamlit_app/widgets/schema.py
new file mode 100644
index 0000000000..f7883bc45e
--- /dev/null
+++ b/dlt/helpers/streamlit_app/widgets/schema.py
@@ -0,0 +1,21 @@
+import dlt
+import streamlit as st
+
+
+def schema_picker(pipeline: dlt.Pipeline) -> None:
+    schema = None
+    num_schemas = len(pipeline.schema_names)
+    if num_schemas == 1:
+        schema_name = pipeline.schema_names[0]
+        schema = pipeline.schemas.get(schema_name)
+    elif num_schemas > 1:
+        text = "Select schema"
+        selected_schema_name = st.selectbox(
+            text,
+            sorted(pipeline.schema_names),
+        )
+        schema = pipeline.schemas.get(selected_schema_name)
+
+    if schema:
+        st.subheader(f"Schema: {schema.name}")
+        st.session_state["schema"] = schema
diff --git a/dlt/helpers/streamlit_app/widgets/stats.py b/dlt/helpers/streamlit_app/widgets/stats.py
new file mode 100644
index 0000000000..d0fded508b
--- /dev/null
+++ b/dlt/helpers/streamlit_app/widgets/stats.py
@@ -0,0 +1,58 @@
+from typing import Any, Optional
+import streamlit as st
+
+
+def stat(
+    label: str,
+    value: Any,
+    width: Optional[str] = "100%",
+    display: Optional[str] = "inline-block",
+    background_color: Optional[str] = "#0e1111",
+    border_radius: Optional[int] = 4,
+    border_color: Optional[str] = "#272736",
+    border_left_color: Optional[str] = "#007b05",
+    border_left_width: Optional[int] = 0,
+) -> None:
+    stat_html = f"""
+    <div class="stat">
+        <p class="stat-label">{label}</p>
+        <p class="stat-value">{value}</p>
+    </div>
+    """
+    mode = st.session_state.get("color_mode", "dark")
+    if mode == "light":
+        background_color = "#FEFEFE"
+        border_left_color = "#333333"
+
+    styles = """
+        .stat {
+            display: %s;
+            width: %s;
+            border-radius: %dpx;
+            border: 1px solid %s;
+            background-color: %s;
+            padding: 2%% 2%% 1%% 5%%;
+            margin-bottom: 2%%;
+        }
+        .stat-label {
+            font-size: 14px;
+            margin-bottom: 5px;
+        }
+        .stat-value {
+            font-size: 32px;
+            margin-bottom: 0;
+        }
+        %s
+        """ % (display, width, border_radius, border_color, background_color, "")
+
+    if border_left_width > 1:
+        styles += """
+        .stat {
+            border-left: %dpx solid %s !important;
+        }
+        """ % (border_left_width, border_left_color)
+
+    st.markdown(
+        stat_html + f"<style>{styles}</style>",
+        unsafe_allow_html=True,
+    )
diff --git a/dlt/helpers/streamlit_app/widgets/summary.py b/dlt/helpers/streamlit_app/widgets/summary.py
new file mode 100644
index 0000000000..afbefbe608
--- /dev/null
+++ b/dlt/helpers/streamlit_app/widgets/summary.py
@@ -0,0 +1,21 @@
+import dlt
+import streamlit as st
+from dlt.pipeline.exceptions import SqlClientNotAvailable
+
+
+def pipeline_summary(pipeline: dlt.Pipeline) -> None:
+    try:
+        credentials = pipeline.sql_client().credentials
+    except SqlClientNotAvailable:
+        credentials = "---"
+        st.error("🚨 Cannot load data - SqlClient not available")
+
+    schema_names = ", ".join(sorted(pipeline.schema_names))
+    st.subheader("Pipeline info", divider=True)
+    st.markdown(f"""
+        * pipeline name: **{pipeline.pipeline_name}**
+        * destination: **{str(credentials)}** in **{pipeline.destination.destination_description}**
+        * dataset name: **{pipeline.dataset_name}**
+        * default schema name: **{pipeline.default_schema_name}**
+        * all schema names: **{schema_names}**
+        """)
diff --git a/dlt/helpers/streamlit_app/widgets/tags.py b/dlt/helpers/streamlit_app/widgets/tags.py
new file mode 100644
index 0000000000..a591e50efe
--- /dev/null
+++ b/dlt/helpers/streamlit_app/widgets/tags.py
@@ -0,0 +1,41 @@
+from typing import Optional, Literal
+
+import streamlit as st
+
+TagType = Literal["info", "success", "warning", "error", "mute"]
+
+
+def tag(
+    tag_name: str,
+    label: Optional[str] = None,
+    border_radius: Optional[int] = 4,
+    bold: Optional[bool] = False,
+    tag_type: Optional[TagType] = "mute",
+) -> None:
+    tag_html = f"""
+    {str(label)+": " if label else ""}<span class="tag">{tag_name}</span>
+    """
+    kinds = {
+        "mute": {"text_color": "#495057", "bg_color": "#e9ecef"},
+        "info": {"text_color": "#1864ab", "bg_color": "#4dabf7"},
+        "success": {"text_color": "#2b8a3e", "bg_color": "#8ce99a"},
+        "warning": {"text_color": "#d9480f", "bg_color": "#ffa94d"},
+        "error": {"text_color": "#c92a2a", "bg_color": "#ffe3e3"},
+    }
+    kind = kinds[tag_type]
+    bg_color = kind["bg_color"]
+    text_color = kind["text_color"]
+
+    styles = """
+    <style>
+    .tag {
+        border-radius: %dpx;
+        background: %s;
+        padding: 1px 4px;
+        color: %s;
+        font-weight: %s;
+    }
+    </style>
+    """ % (border_radius, bg_color, text_color, "600" if bold else "normal")
+
+    st.markdown(tag_html + styles, unsafe_allow_html=True)
diff --git a/dlt/helpers/streamlit_helper.py b/dlt/helpers/streamlit_helper.py
deleted file mode 100644
index f6b2f3a62f..0000000000
--- a/dlt/helpers/streamlit_helper.py
+++ /dev/null
@@ -1,386 +0,0 @@
-import sys
-from typing import Dict, List, Iterator
-import humanize
-
-from dlt.common import pendulum
-from dlt.common.typing import AnyFun
-from dlt.common.configuration.exceptions import ConfigFieldMissingException
-from dlt.common.exceptions import MissingDependencyException
-from dlt.common.destination.reference import WithStateSync
-from dlt.common.utils import flatten_list_or_items
-
-from dlt.common.libs.pandas import pandas
-from dlt.pipeline import Pipeline
-from dlt.pipeline.exceptions import CannotRestorePipelineException, SqlClientNotAvailable
-from dlt.pipeline.state_sync import load_state_from_destination
-
-try:
-    import streamlit as st
-
-    # from streamlit import SECRETS_FILE_LOC, secrets
-except ModuleNotFoundError:
-    raise MissingDependencyException(
-        "DLT Streamlit Helpers",
-        ["streamlit"],
-        "DLT Helpers for Streamlit should be run within a streamlit app.",
-    )
-
-
-# use right caching function to disable deprecation message
-if hasattr(st, "cache_data"):
-    cache_data = st.cache_data
-else:
-    cache_data = st.experimental_memo
-
-# def restore_pipeline() -> Pipeline:
-#     """Restores Pipeline instance and associated credentials from Streamlit secrets
-
-#         Current implementation requires that pipeline working dir is available at the location saved in secrets.
-
-#     Raises:
-#         PipelineBackupNotFound: Raised when pipeline backup is not available
-#         CannotRestorePipelineException: Raised when pipeline working dir is not found or invalid
-
-#     Returns:
-#         Pipeline: Instance of pipeline with attached credentials
-#     """
-#     if "dlt" not in secrets:
-#         raise PipelineException("You must backup pipeline to Streamlit first")
-#     dlt_cfg = secrets["dlt"]
-#     credentials = deepcopy(dict(dlt_cfg["destination"]))
-#     if "default_schema_name" in credentials:
-#         del credentials["default_schema_name"]
-#     credentials.update(dlt_cfg["credentials"])
-#     pipeline = Pipeline(dlt_cfg["pipeline_name"])
-#     pipeline.restore_pipeline(credentials_from_dict(credentials), dlt_cfg["working_dir"])
-#     return pipeline
-
-
-# def backup_pipeline(pipeline: Pipeline) -> None:
-#     """Backups pipeline state to the `secrets.toml` of the Streamlit app.
-
-#     Pipeline credentials and working directory will be added to the Streamlit `secrets` file. This allows to access query the data loaded to the destination and
-#     access definitions of the inferred schemas. See `restore_pipeline` and `write_data_explorer_page` functions in the same module.
-
-#     Args:
-#         pipeline (Pipeline): Pipeline instance, typically restored with `restore_pipeline`
-#     """
-#     # save pipeline state to project .config
-#     # config_file_name = file_util.get_project_streamlit_file_path("config.toml")
-
-#     # save credentials to secrets
-#     if os.path.isfile(SECRETS_FILE_LOC):
-#         with open(SECRETS_FILE_LOC, "r", encoding="utf-8") as f:
-#             # use whitespace preserving parser
-#             secrets_ = tomlkit.load(f)
-#     else:
-#         secrets_ = tomlkit.document()
-
-#     # save general settings
-#     secrets_["dlt"] = {
-#         "working_dir": pipeline.working_dir,
-#         "pipeline_name": pipeline.pipeline_name
-#     }
-
-#     # get client config
-#     # TODO: pipeline api v2 should provide a direct method to get configurations
-#     CONFIG: BaseConfiguration = pipeline._loader_instance.load_client_cls.CONFIG  # type: ignore
-#     CREDENTIALS: CredentialsConfiguration = pipeline._loader_instance.load_client_cls.CREDENTIALS  # type: ignore
-
-#     # save client config
-#     # print(dict_remove_nones_in_place(CONFIG.as_dict(lowercase=False)))
-#     dlt_c = cast(TomlContainer, secrets_["dlt"])
-#     dlt_c["destination"] = dict_remove_nones_in_place(dict(CONFIG))
-#     dlt_c["credentials"] = dict_remove_nones_in_place(dict(CREDENTIALS))
-
-#     with open(SECRETS_FILE_LOC, "w", encoding="utf-8") as f:
-#         # use whitespace preserving parser
-#         tomlkit.dump(secrets_, f)
-
-
-def write_load_status_page(pipeline: Pipeline) -> None:
-    """Display pipeline loading information. Will be moved to dlt package once tested"""
-
-    @cache_data(ttl=600)
-    def _query_data(query: str, schema_name: str = None) -> pandas.DataFrame:
-        try:
-            with pipeline.sql_client(schema_name) as client:
-                with client.execute_query(query) as curr:
-                    return curr.df()
-        except SqlClientNotAvailable:
-            st.error("Cannot load data - SqlClient not available")
-
-    @cache_data(ttl=5)
-    def _query_data_live(query: str, schema_name: str = None) -> pandas.DataFrame:
-        try:
-            with pipeline.sql_client(schema_name) as client:
-                with client.execute_query(query) as curr:
-                    return curr.df()
-        except SqlClientNotAvailable:
-            st.error("Cannot load data - SqlClient not available")
-
-    try:
-        st.header("Pipeline info")
-        credentials = pipeline.sql_client().credentials
-        schema_names = ", ".join(sorted(pipeline.schema_names))
-        st.markdown(f"""
-        * pipeline name: **{pipeline.pipeline_name}**
-        * destination: **{str(credentials)}** in **{pipeline.destination.destination_description}**
-        * dataset name: **{pipeline.dataset_name}**
-        * default schema name: **{pipeline.default_schema_name}**
-        * all schema names: **{schema_names}**
-        """)
-
-        st.header("Last load info")
-        col1, col2, col3 = st.columns(3)
-        loads_df = _query_data_live(
-            f"SELECT load_id, inserted_at FROM {pipeline.default_schema.loads_table_name} WHERE"
-            " status = 0 ORDER BY inserted_at DESC LIMIT 101 "
-        )
-        loads_no = loads_df.shape[0]
-        if loads_df.shape[0] > 0:
-            rel_time = (
-                humanize.naturaldelta(
-                    pendulum.now() - pendulum.from_timestamp(loads_df.iloc[0, 1].timestamp())
-                )
-                + " ago"
-            )
-            last_load_id = loads_df.iloc[0, 0]
-            if loads_no > 100:
-                loads_no = "> " + str(loads_no)
-        else:
-            rel_time = "---"
-            last_load_id = "---"
-        col1.metric("Last load time", rel_time)
-        col2.metric("Last load id", last_load_id)
-        col3.metric("Total number of loads", loads_no)
-
-        st.markdown("**Number of loaded rows:**")
-        selected_load_id = st.selectbox("Select load id", loads_df)
-        schema = pipeline.default_schema
-
-        # construct a union query
-        query_parts = []
-        for table in schema.data_tables():
-            if "parent" in table:
-                continue
-            table_name = table["name"]
-            query_parts.append(
-                f"SELECT '{table_name}' as table_name, COUNT(1) As rows_count FROM"
-                f" {table_name} WHERE _dlt_load_id = '{selected_load_id}'"
-            )
-            query_parts.append("UNION ALL")
-        query_parts.pop()
-        rows_counts_df = _query_data("\n".join(query_parts))
-
-        st.markdown(f"Rows loaded in **{selected_load_id}**")
-        st.dataframe(rows_counts_df)
-
-        st.markdown("**Last 100 loads**")
-        st.dataframe(loads_df)
-
-        st.header("Schema updates")
-        schemas_df = _query_data_live(
-            "SELECT schema_name, inserted_at, version, version_hash FROM"
-            f" {pipeline.default_schema.version_table_name} ORDER BY inserted_at DESC LIMIT 101 "
-        )
-        st.markdown("**100 recent schema updates**")
-        st.dataframe(schemas_df)
-
-        st.header("Pipeline state info")
-        with pipeline.destination_client() as client:
-            if isinstance(client, WithStateSync):
-                remote_state = load_state_from_destination(pipeline.pipeline_name, client)
-        local_state = pipeline.state
-
-        col1, col2 = st.columns(2)
-        if remote_state:
-            remote_state_version = remote_state["_state_version"]
-        else:
-            remote_state_version = "---"  # type: ignore
-
-        col1.metric("Local state version", local_state["_state_version"])
-        col2.metric("Remote state version", remote_state_version)
-
-        if remote_state_version != local_state["_state_version"]:
-            st.warning(
-                "Looks like that local state is not yet synchronized or synchronization is disabled"
-            )
-
-    except CannotRestorePipelineException as restore_ex:
-        st.error("Seems like the pipeline does not exist. Did you run it at least once?")
-        st.exception(restore_ex)
-
-    except ConfigFieldMissingException as cf_ex:
-        st.error(
-            "Pipeline credentials/configuration is missing. This most often happen when you run the"
-            " streamlit app from different folder than the `.dlt` with `toml` files resides."
-        )
-        st.text(str(cf_ex))
-
-    except Exception as ex:
-        st.error("Pipeline info could not be prepared. Did you load the data at least once?")
-        st.exception(ex)
-
-
-def write_data_explorer_page(
-    pipeline: Pipeline,
-    schema_name: str = None,
-    show_dlt_tables: bool = False,
-    example_query: str = "",
-    show_charts: bool = True,
-) -> None:
-    """Writes Streamlit app page with a schema and live data preview.
-
-    #### Args:
-        pipeline (Pipeline): Pipeline instance to use.
-        schema_name (str, optional): Name of the schema to display. If None, default schema is used.
-        show_dlt_tables (bool, optional): Should show dlt internal tables. Defaults to False.
-        example_query (str, optional): Example query to be displayed in the SQL Query box.
-        show_charts (bool, optional): Should automatically show charts for the queries from SQL Query box. Defaults to True.
-
-    Raises:
-        MissingDependencyException: Raised when a particular python dependency is not installed
-    """
-
-    @cache_data(ttl=60)
-    def _query_data(query: str, chunk_size: int = None) -> pandas.DataFrame:
-        try:
-            with pipeline.sql_client(schema_name) as client:
-                with client.execute_query(query) as curr:
-                    return curr.df(chunk_size=chunk_size)
-        except SqlClientNotAvailable:
-            st.error("Cannot load data - SqlClient not available")
-
-    st.header("Schemas and their tables")
-
-    num_schemas = len(pipeline.schema_names)
-    if num_schemas == 1:
-        schema_name = pipeline.schema_names[0]
-        selected_schema = pipeline.schemas.get(schema_name)
-        st.subheader(f"Schema: {schema_name}")
-    elif num_schemas > 1:
-        st.subheader("Schema:")
-        text = "Pick a schema name to see all its tables below"
-        selected_schema_name = st.selectbox(text, sorted(pipeline.schema_names))
-        selected_schema = pipeline.schemas.get(selected_schema_name)
-
-    for table in sorted(selected_schema.data_tables(), key=lambda table: table["name"]):
-        table_name = table["name"]
-        st.subheader(f"Table: {table_name}")
-        if "description" in table:
-            st.text(table["description"])
-        table_hints: List[str] = []
-        if "parent" in table:
-            table_hints.append("parent: **%s**" % table["parent"])
-        if "resource" in table:
-            table_hints.append("resource: **%s**" % table["resource"])
-        if "write_disposition" in table:
-            table_hints.append("write disposition: **%s**" % table["write_disposition"])
-        columns = table["columns"]
-        primary_keys: Iterator[str] = flatten_list_or_items(
-            [
-                col_name
-                for col_name in columns.keys()
-                if not col_name.startswith("_") and not columns[col_name].get("primary_key") is None
-            ]
-        )
-        table_hints.append("primary key(s): **%s**" % ", ".join(primary_keys))
-        merge_keys = flatten_list_or_items(
-            [
-                col_name
-                for col_name in columns.keys()
-                if not col_name.startswith("_") and not columns[col_name].get("merge_key") is None
-            ]
-        )
-        table_hints.append("merge key(s): **%s**" % ", ".join(merge_keys))
-
-        st.markdown(" | ".join(table_hints))
-
-        # table schema contains various hints (like clustering or partition options) that we do not want to show in basic view
-        essentials_f = lambda c: {
-            k: v for k, v in c.items() if k in ["name", "data_type", "nullable"]
-        }
-
-        st.table(map(essentials_f, table["columns"].values()))
-        # add a button that when pressed will show the full content of a table
-        if st.button("SHOW DATA", key=table_name):
-            df = _query_data(f"SELECT * FROM {table_name}", chunk_size=2048)
-            if df is None:
-                st.text("No rows returned")
-            else:
-                rows_count = df.shape[0]
-                if df.shape[0] < 2048:
-                    st.text(f"All {rows_count} row(s)")
-                else:
-                    st.text(f"Top {rows_count} row(s)")
-                st.dataframe(df)
-
-    st.header("Run your query")
-    sql_query = st.text_area("Enter your SQL query", value=example_query)
-    if st.button("Run Query"):
-        if sql_query:
-            try:
-                # run the query from the text area
-                df = _query_data(sql_query)
-                if df is None:
-                    st.text("No rows returned")
-                else:
-                    rows_count = df.shape[0]
-                    st.text(f"{rows_count} row(s) returned")
-                    st.dataframe(df)
-                    try:
-                        # now if the dataset has supported shape try to display the bar or altair chart
-                        if df.dtypes.shape[0] == 1 and show_charts:
-                            # try barchart
-                            st.bar_chart(df)
-                        if df.dtypes.shape[0] == 2 and show_charts:
-                            # try to import altair charts
-                            try:
-                                import altair as alt
-                            except ModuleNotFoundError:
-                                raise MissingDependencyException(
-                                    "DLT Streamlit Helpers",
-                                    ["altair"],
-                                    "DLT Helpers for Streamlit should be run within a streamlit"
-                                    " app.",
-                                )
-
-                            # try altair
-                            bar_chart = (
-                                alt.Chart(df)
-                                .mark_bar()
-                                .encode(
-                                    x=f"{df.columns[1]}:Q", y=alt.Y(f"{df.columns[0]}:N", sort="-x")
-                                )
-                            )
-                            st.altair_chart(bar_chart, use_container_width=True)
-                    except Exception as ex:
-                        st.error(f"Chart failed due to: {ex}")
-            except Exception as ex:
-                st.text("Exception when running query")
-                st.exception(ex)
-
-
-def display(pipeline_name: str) -> None:
-    import dlt
-
-    pipeline = dlt.attach(pipeline_name)
-
-    pages: Dict[str, AnyFun] = {
-        "Explore data": write_data_explorer_page,
-        "Load info": write_load_status_page,
-    }
-
-    st.title(f"Show {pipeline_name} pipeline")
-
-    st.sidebar.title("Navigation")
-    selection = st.sidebar.radio("Go to", list(pages.keys()))
-    page = pages[selection]
-
-    with st.spinner("Loading Page ..."):
-        page(pipeline)
-
-
-if __name__ == "__main__":
-    display(sys.argv[1])
diff --git a/dlt/load/load.py b/dlt/load/load.py
index 050e7bce67..23c3dea820 100644
--- a/dlt/load/load.py
+++ b/dlt/load/load.py
@@ -7,10 +7,17 @@
 
 from dlt.common import sleep, logger
 from dlt.common.configuration import with_config, known_sections
+from dlt.common.configuration.resolve import inject_section
 from dlt.common.configuration.accessors import config
-from dlt.common.pipeline import LoadInfo, LoadMetrics, SupportsPipeline, WithStepInfo
-from dlt.common.schema.utils import get_top_level_table
+from dlt.common.pipeline import (
+    LoadInfo,
+    LoadMetrics,
+    SupportsPipeline,
+    WithStepInfo,
+)
+from dlt.common.schema.utils import get_child_tables, get_top_level_table
 from dlt.common.storages.load_storage import LoadPackageInfo, ParsedLoadJobFileName, TJobState
+from dlt.common.storages.load_package import LoadPackageStateInjectableContext
 from dlt.common.runners import TRunMetrics, Runnable, workermethod, NullExecutor
 from dlt.common.runtime.collector import Collector, NULL_COLLECTOR
 from dlt.common.runtime.logger import pretty_format_exception
@@ -19,7 +26,10 @@
     DestinationTerminalException,
     DestinationTransientException,
 )
+from dlt.common.configuration.container import Container
+
 from dlt.common.schema import Schema, TSchemaTables
+
 from dlt.common.storages import LoadStorage
 from dlt.common.destination.reference import (
     DestinationClientDwhConfiguration,
@@ -34,6 +44,7 @@
     SupportsStagingDestination,
     TDestination,
 )
+from dlt.common.configuration.specs.config_section_context import ConfigSectionContext
 
 from dlt.destinations.job_impl import EmptyLoadJob
 
@@ -414,7 +425,7 @@ def load_single_package(self, load_id: str, schema: Schema) -> None:
                                 failed_job.job_file_info.job_id(),
                                 failed_job.failed_message,
                             )
-                    # possibly raise on too many retires
+                    # possibly raise on too many retries
                     if self.config.raise_on_max_retries:
                         for new_job in package_info.jobs["new_jobs"]:
                             r_c = new_job.job_file_info.retry_count
@@ -452,12 +463,19 @@ def run(self, pool: Optional[Executor]) -> TRunMetrics:
         schema = self.load_storage.normalized_packages.load_schema(load_id)
         logger.info(f"Loaded schema name {schema.name} and version {schema.stored_version}")
 
+        container = Container()
         # get top load id and mark as being processed
         with self.collector(f"Load {schema.name} in {load_id}"):
-            # the same load id may be processed across multiple runs
-            if not self.current_load_id:
-                self._step_info_start_load_id(load_id)
-            self.load_single_package(load_id, schema)
+            with container.injectable_context(
+                LoadPackageStateInjectableContext(
+                    storage=self.load_storage.normalized_packages,
+                    load_id=load_id,
+                )
+            ):
+                # the same load id may be processed across multiple runs
+                if not self.current_load_id:
+                    self._step_info_start_load_id(load_id)
+                self.load_single_package(load_id, schema)
 
         return TRunMetrics(False, len(self.load_storage.list_normalized_packages()))
 
diff --git a/dlt/normalize/items_normalizers.py b/dlt/normalize/items_normalizers.py
index 56d38a5a64..8565a5d2b2 100644
--- a/dlt/normalize/items_normalizers.py
+++ b/dlt/normalize/items_normalizers.py
@@ -74,7 +74,7 @@ def _filter_columns(
         return row
 
     def _normalize_chunk(
-        self, root_table_name: str, items: List[TDataItem], may_have_pua: bool
+        self, root_table_name: str, items: List[TDataItem], may_have_pua: bool, skip_write: bool
     ) -> TSchemaUpdate:
         column_schemas = self._column_schemas
         schema_update: TSchemaUpdate = {}
@@ -172,9 +172,11 @@ def _normalize_chunk(
                     # store row
                     # TODO: store all rows for particular items all together after item is fully completed
                     #   will be useful if we implement bad data sending to a table
-                    self.load_storage.write_data_item(
-                        self.load_id, schema_name, table_name, row, columns
-                    )
+                    # we skip write when discovering schema for empty file
+                    if not skip_write:
+                        self.load_storage.write_data_item(
+                            self.load_id, schema_name, table_name, row, columns
+                        )
             except StopIteration:
                 pass
             signals.raise_if_signalled()
@@ -193,22 +195,31 @@ def __call__(
             line: bytes = None
             for line_no, line in enumerate(f):
                 items: List[TDataItem] = json.loadb(line)
-                partial_update = self._normalize_chunk(root_table_name, items, may_have_pua(line))
+                partial_update = self._normalize_chunk(
+                    root_table_name, items, may_have_pua(line), skip_write=False
+                )
                 schema_updates.append(partial_update)
                 logger.debug(f"Processed {line_no} lines from file {extracted_items_file}")
             if line is None and root_table_name in self.schema.tables:
-                # write only if table seen data before
+                # TODO: we should push the truncate jobs via package state
+                # not as empty jobs. empty jobs should be reserved for
+                # materializing schemas and other edge cases ie. empty parquet files
                 root_table = self.schema.tables[root_table_name]
-                if has_table_seen_data(root_table):
-                    self.load_storage.write_empty_items_file(
-                        self.load_id,
-                        self.schema.name,
-                        root_table_name,
-                        self.schema.get_table_columns(root_table_name),
-                    )
-                    logger.debug(
-                        f"No lines in file {extracted_items_file}, written empty load job file"
+                if not has_table_seen_data(root_table):
+                    # if this is a new table, add normalizer columns
+                    partial_update = self._normalize_chunk(
+                        root_table_name, [{}], False, skip_write=True
                     )
+                    schema_updates.append(partial_update)
+                self.load_storage.write_empty_items_file(
+                    self.load_id,
+                    self.schema.name,
+                    root_table_name,
+                    self.schema.get_table_columns(root_table_name),
+                )
+                logger.debug(
+                    f"No lines in file {extracted_items_file}, written empty load job file"
+                )
 
         return schema_updates
 
diff --git a/dlt/normalize/normalize.py b/dlt/normalize/normalize.py
index c5762af680..18f8faaa25 100644
--- a/dlt/normalize/normalize.py
+++ b/dlt/normalize/normalize.py
@@ -304,7 +304,7 @@ def spool_files(
             # drop evolve once for all tables that seen data
             x_normalizer.pop("evolve-columns-once", None)
             # mark that table have seen data only if there was data
-            if table_metrics[table_name].items_count > 0 and "seen-data" not in x_normalizer:
+            if "seen-data" not in x_normalizer:
                 logger.info(
                     f"Table {table_name} has seen data for a first time with load id {load_id}"
                 )
diff --git a/dlt/pipeline/current.py b/dlt/pipeline/current.py
index 7fdc0f095c..25fd398623 100644
--- a/dlt/pipeline/current.py
+++ b/dlt/pipeline/current.py
@@ -2,6 +2,13 @@
 
 from dlt.common.pipeline import source_state as _state, resource_state, get_current_pipe_name
 from dlt.pipeline import pipeline as _pipeline
+from dlt.extract.decorators import get_source_schema
+from dlt.common.storages.load_package import (
+    load_package,
+    commit_load_package_state,
+    destination_state,
+    clear_destination_state,
+)
 from dlt.extract.decorators import get_source_schema, get_source
 
 pipeline = _pipeline
diff --git a/dlt/pipeline/exceptions.py b/dlt/pipeline/exceptions.py
index ac203d95a0..d3538a8377 100644
--- a/dlt/pipeline/exceptions.py
+++ b/dlt/pipeline/exceptions.py
@@ -90,7 +90,9 @@ def __init__(
         super().__init__(
             pipeline_name,
             f"No engine upgrade path for state in pipeline {pipeline_name} from {init_engine} to"
-            f" {to_engine}, stopped at {from_engine}",
+            f" {to_engine}, stopped at {from_engine}. You possibly tried to run an older dlt"
+            " version against a destination you have previously loaded data to with a newer dlt"
+            " version.",
         )
 
 
diff --git a/dlt/pipeline/mark.py b/dlt/pipeline/mark.py
index 0aba0e19ae..3956d9bbe2 100644
--- a/dlt/pipeline/mark.py
+++ b/dlt/pipeline/mark.py
@@ -1,2 +1,7 @@
 """Module with mark functions that make data to be specially processed"""
-from dlt.extract import with_table_name, with_hints, make_hints
+from dlt.extract import (
+    with_table_name,
+    with_hints,
+    make_hints,
+    materialize_schema_item as materialize_table_schema,
+)
diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py
index 185a11962a..d1d558b3b8 100644
--- a/dlt/pipeline/pipeline.py
+++ b/dlt/pipeline/pipeline.py
@@ -47,7 +47,7 @@
 )
 from dlt.common.schema.utils import normalize_schema_name
 from dlt.common.storages.exceptions import LoadPackageNotFound
-from dlt.common.typing import DictStrStr, TFun, TSecretValue, is_optional_type
+from dlt.common.typing import DictStrAny, TFun, TSecretValue, is_optional_type
 from dlt.common.runners import pool_runner as runner
 from dlt.common.storages import (
     LiveSchemaStorage,
@@ -126,15 +126,17 @@
 )
 from dlt.pipeline.typing import TPipelineStep
 from dlt.pipeline.state_sync import (
-    STATE_ENGINE_VERSION,
-    bump_version_if_modified,
-    load_state_from_destination,
-    migrate_state,
+    PIPELINE_STATE_ENGINE_VERSION,
+    bump_pipeline_state_version_if_modified,
+    load_pipeline_state_from_destination,
+    migrate_pipeline_state,
     state_resource,
     json_encode_state,
     json_decode_state,
+    default_pipeline_state,
 )
 from dlt.pipeline.warnings import credentials_argument_deprecated
+from dlt.common.storages.load_package import TLoadPackageState
 
 
 def with_state_sync(may_extract_state: bool = False) -> Callable[[TFun], TFun]:
@@ -143,6 +145,7 @@ def decorator(f: TFun) -> TFun:
         def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any:
             # activate pipeline so right state is always provided
             self.activate()
+
             # backup and restore state
             should_extract_state = may_extract_state and self.config.restore_from_destination
             with self.managed_state(extract_state=should_extract_state) as state:
@@ -263,7 +266,14 @@ class Pipeline(SupportsPipeline):
     STATE_FILE: ClassVar[str] = "state.json"
     STATE_PROPS: ClassVar[List[str]] = list(
         set(get_type_hints(TPipelineState).keys())
-        - {"sources", "destination_type", "destination_name", "staging_type", "staging_name"}
+        - {
+            "sources",
+            "destination_type",
+            "destination_name",
+            "staging_type",
+            "staging_name",
+            "destinations",
+        }
     )
     LOCAL_STATE_PROPS: ClassVar[List[str]] = list(get_type_hints(TPipelineLocalState).keys())
     DEFAULT_DATASET_SUFFIX: ClassVar[str] = "_dataset"
@@ -438,6 +448,7 @@ def normalize(
         """Normalizes the data prepared with `extract` method, infers the schema and creates load packages for the `load` method. Requires `destination` to be known."""
         if is_interactive():
             workers = 1
+
         if loader_file_format and loader_file_format in INTERNAL_LOADER_FILE_FORMATS:
             raise ValueError(f"{loader_file_format} is one of internal dlt file formats.")
         # check if any schema is present, if not then no data was extracted
@@ -446,6 +457,7 @@ def normalize(
 
         # make sure destination capabilities are available
         self._get_destination_capabilities()
+
         # create default normalize config
         normalize_config = NormalizeConfiguration(
             workers=workers,
@@ -745,7 +757,7 @@ def sync_destination(
 
             # write the state back
             self._props_to_state(state)
-            bump_version_if_modified(state)
+            bump_pipeline_state_version_if_modified(state)
             self._save_state(state)
         except Exception as ex:
             raise PipelineStepFailed(self, "sync", None, ex, None) from ex
@@ -845,6 +857,10 @@ def get_load_package_info(self, load_id: str) -> LoadPackageInfo:
         except LoadPackageNotFound:
             return self._get_normalize_storage().extracted_packages.get_load_package_info(load_id)
 
+    def get_load_package_state(self, load_id: str) -> TLoadPackageState:
+        """Returns information on extracted/normalized/completed package with given load_id, all jobs and their statuses."""
+        return self._get_load_storage().get_load_package_state(load_id)
+
     def list_failed_jobs_in_package(self, load_id: str) -> Sequence[LoadJobInfo]:
         """List all failed jobs and associated error messages for a specified `load_id`"""
         return self._get_load_storage().get_load_package_info(load_id).jobs.get("failed_jobs", [])
@@ -1365,16 +1381,15 @@ def _get_step_info(self, step: WithStepInfo[TStepMetrics, TStepInfo]) -> TStepIn
     def _get_state(self) -> TPipelineState:
         try:
             state = json_decode_state(self._pipeline_storage.load(Pipeline.STATE_FILE))
-            return migrate_state(
-                self.pipeline_name, state, state["_state_engine_version"], STATE_ENGINE_VERSION
+            return migrate_pipeline_state(
+                self.pipeline_name,
+                state,
+                state["_state_engine_version"],
+                PIPELINE_STATE_ENGINE_VERSION,
             )
         except FileNotFoundError:
             # do not set the state hash, this will happen on first merge
-            return {
-                "_state_version": 0,
-                "_state_engine_version": STATE_ENGINE_VERSION,
-                "_local": {"first_run": True},
-            }
+            return default_pipeline_state()
             # state["_version_hash"] = generate_version_hash(state)
             # return state
 
@@ -1404,7 +1419,7 @@ def _restore_state_from_destination(self) -> Optional[TPipelineState]:
                 schema = Schema(schema_name)
             with self._get_destination_clients(schema)[0] as job_client:
                 if isinstance(job_client, WithStateSync):
-                    state = load_state_from_destination(self.pipeline_name, job_client)
+                    state = load_pipeline_state_from_destination(self.pipeline_name, job_client)
                     if state is None:
                         logger.info(
                             "The state was not found in the destination"
@@ -1538,7 +1553,7 @@ def _bump_version_and_extract_state(
 
         Storage will be created on demand. In that case the extracted package will be immediately committed.
         """
-        _, hash_, _ = bump_version_if_modified(self._props_to_state(state))
+        _, hash_, _ = bump_pipeline_state_version_if_modified(self._props_to_state(state))
         should_extract = hash_ != state["_local"].get("_last_extracted_hash")
         if should_extract and extract_state:
             data = state_resource(state)
diff --git a/dlt/pipeline/state_sync.py b/dlt/pipeline/state_sync.py
index fa3939969b..8c72a218a4 100644
--- a/dlt/pipeline/state_sync.py
+++ b/dlt/pipeline/state_sync.py
@@ -1,25 +1,28 @@
-import base64
 import binascii
 from copy import copy
-import hashlib
-from typing import Tuple, cast
+from typing import Tuple, cast, List
 import pendulum
 
 import dlt
 from dlt.common import json
-from dlt.common.pipeline import TPipelineState
 from dlt.common.typing import DictStrAny
 from dlt.common.schema.typing import STATE_TABLE_NAME, TTableSchemaColumns
 from dlt.common.destination.reference import WithStateSync, Destination
 from dlt.common.utils import compressed_b64decode, compressed_b64encode
+from dlt.common.versioned_state import (
+    generate_state_version_hash,
+    bump_state_version_if_modified,
+    default_versioned_state,
+)
+from dlt.common.pipeline import TPipelineState
 
 from dlt.extract import DltResource
 
-from dlt.pipeline.exceptions import PipelineStateEngineNoUpgradePathException
+from dlt.pipeline.exceptions import (
+    PipelineStateEngineNoUpgradePathException,
+)
 
-
-# allows to upgrade state when restored with a new version of state logic/schema
-STATE_ENGINE_VERSION = 4
+PIPELINE_STATE_ENGINE_VERSION = 4
 
 # state table columns
 STATE_TABLE_COLUMNS: TTableSchemaColumns = {
@@ -57,59 +60,15 @@ def decompress_state(state_str: str) -> DictStrAny:
         return json.typed_loadb(state_bytes)  # type: ignore[no-any-return]
 
 
-def generate_version_hash(state: TPipelineState) -> str:
-    # generates hash out of stored schema content, excluding hash itself, version and local state
-    state_copy = copy(state)
-    state_copy.pop("_state_version", None)
-    state_copy.pop("_state_engine_version", None)
-    state_copy.pop("_version_hash", None)
-    state_copy.pop("_local", None)
-    content = json.typed_dumpb(state_copy, sort_keys=True)
-    h = hashlib.sha3_256(content)
-    return base64.b64encode(h.digest()).decode("ascii")
-
+def generate_pipeline_state_version_hash(state: TPipelineState) -> str:
+    return generate_state_version_hash(state, exclude_attrs=["_local"])
 
-def bump_version_if_modified(state: TPipelineState) -> Tuple[int, str, str]:
-    """Bumps the `state` version and version hash if content modified, returns (new version, new hash, old hash) tuple"""
-    hash_ = generate_version_hash(state)
-    previous_hash = state.get("_version_hash")
-    if not previous_hash:
-        # if hash was not set, set it without bumping the version, that's initial schema
-        pass
-    elif hash_ != previous_hash:
-        state["_state_version"] += 1
 
-    state["_version_hash"] = hash_
-    return state["_state_version"], hash_, previous_hash
+def bump_pipeline_state_version_if_modified(state: TPipelineState) -> Tuple[int, str, str]:
+    return bump_state_version_if_modified(state, exclude_attrs=["_local"])
 
 
-def state_resource(state: TPipelineState) -> DltResource:
-    state = copy(state)
-    state.pop("_local")
-    state_str = compress_state(state)
-    state_doc = {
-        "version": state["_state_version"],
-        "engine_version": state["_state_engine_version"],
-        "pipeline_name": state["pipeline_name"],
-        "state": state_str,
-        "created_at": pendulum.now(),
-        "version_hash": state["_version_hash"],
-    }
-    return dlt.resource(
-        [state_doc], name=STATE_TABLE_NAME, write_disposition="append", columns=STATE_TABLE_COLUMNS
-    )
-
-
-def load_state_from_destination(pipeline_name: str, client: WithStateSync) -> TPipelineState:
-    # NOTE: if dataset or table holding state does not exist, the sql_client will rise DestinationUndefinedEntity. caller must handle this
-    state = client.get_stored_state(pipeline_name)
-    if not state:
-        return None
-    s = decompress_state(state.state)
-    return migrate_state(pipeline_name, s, s["_state_engine_version"], STATE_ENGINE_VERSION)
-
-
-def migrate_state(
+def migrate_pipeline_state(
     pipeline_name: str, state: DictStrAny, from_engine: int, to_engine: int
 ) -> TPipelineState:
     if from_engine == to_engine:
@@ -119,7 +78,7 @@ def migrate_state(
         from_engine = 2
     if from_engine == 2 and to_engine > 2:
         # you may want to recompute hash
-        state["_version_hash"] = generate_version_hash(state)  # type: ignore[arg-type]
+        state["_version_hash"] = generate_pipeline_state_version_hash(state)  # type: ignore[arg-type]
         from_engine = 3
     if from_engine == 3 and to_engine > 3:
         if state.get("destination"):
@@ -139,3 +98,41 @@ def migrate_state(
         )
     state["_state_engine_version"] = from_engine
     return cast(TPipelineState, state)
+
+
+def state_resource(state: TPipelineState) -> DltResource:
+    state = copy(state)
+    state.pop("_local")
+    state_str = compress_state(state)
+    state_doc = {
+        "version": state["_state_version"],
+        "engine_version": state["_state_engine_version"],
+        "pipeline_name": state["pipeline_name"],
+        "state": state_str,
+        "created_at": pendulum.now(),
+        "version_hash": state["_version_hash"],
+    }
+    return dlt.resource(
+        [state_doc], name=STATE_TABLE_NAME, write_disposition="append", columns=STATE_TABLE_COLUMNS
+    )
+
+
+def load_pipeline_state_from_destination(
+    pipeline_name: str, client: WithStateSync
+) -> TPipelineState:
+    # NOTE: if dataset or table holding state does not exist, the sql_client will rise DestinationUndefinedEntity. caller must handle this
+    state = client.get_stored_state(pipeline_name)
+    if not state:
+        return None
+    s = decompress_state(state.state)
+    return migrate_pipeline_state(
+        pipeline_name, s, s["_state_engine_version"], PIPELINE_STATE_ENGINE_VERSION
+    )
+
+
+def default_pipeline_state() -> TPipelineState:
+    return {
+        **default_versioned_state(),
+        "_state_engine_version": PIPELINE_STATE_ENGINE_VERSION,
+        "_local": {"first_run": True},
+    }
diff --git a/docs/examples/custom_destination_bigquery/.dlt/config.toml b/docs/examples/custom_destination_bigquery/.dlt/config.toml
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/examples/custom_destination_bigquery/.dlt/example.secrets.toml b/docs/examples/custom_destination_bigquery/.dlt/example.secrets.toml
new file mode 100644
index 0000000000..a1ed49b2b8
--- /dev/null
+++ b/docs/examples/custom_destination_bigquery/.dlt/example.secrets.toml
@@ -0,0 +1,9 @@
+# you can just paste services.json as credentials
+[destination.bigquery.credentials]
+client_email = ""
+private_key = ""
+project_id = ""
+token_uri = ""
+refresh_token = ""
+client_id = ""
+client_secret = ""
\ No newline at end of file
diff --git a/docs/examples/custom_destination_bigquery/__init__.py b/docs/examples/custom_destination_bigquery/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/examples/custom_destination_bigquery/custom_destination_bigquery.py b/docs/examples/custom_destination_bigquery/custom_destination_bigquery.py
new file mode 100644
index 0000000000..624888f70a
--- /dev/null
+++ b/docs/examples/custom_destination_bigquery/custom_destination_bigquery.py
@@ -0,0 +1,74 @@
+import dlt
+import pandas as pd
+import pyarrow as pa
+from google.cloud import bigquery
+
+from dlt.common.configuration.specs import GcpServiceAccountCredentials
+
+# constants
+OWID_DISASTERS_URL = (
+    "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/"
+    "Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020)/"
+    "Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020).csv"
+)
+# this table needs to be manually created in your gc account
+# format: "your-project.your_dataset.your_table"
+BIGQUERY_TABLE_ID = "chat-analytics-rasa-ci.ci_streaming_insert.natural-disasters"
+
+
+# dlt sources
+@dlt.resource(name="natural_disasters")
+def resource(url: str):
+    # load pyarrow table with pandas
+    table = pa.Table.from_pandas(pd.read_csv(url))
+    # we add a list type column to demontrate bigquery lists
+    table = table.append_column(
+        "tags",
+        pa.array(
+            [["disasters", "earthquakes", "floods", "tsunamis"]] * len(table),
+            pa.list_(pa.string()),
+        ),
+    )
+    # we add a struct type column to demonstrate bigquery structs
+    table = table.append_column(
+        "meta",
+        pa.array(
+            [{"loaded_by": "dlt"}] * len(table),
+            pa.struct([("loaded_by", pa.string())]),
+        ),
+    )
+    yield table
+
+
+# dlt biquery custom destination
+# we can use the dlt provided credentials class
+# to retrieve the gcp credentials from the secrets
+@dlt.destination(name="bigquery", loader_file_format="parquet", batch_size=0)
+def bigquery_insert(
+    items, table, credentials: GcpServiceAccountCredentials = dlt.secrets.value
+) -> None:
+    client = bigquery.Client(
+        credentials.project_id, credentials.to_native_credentials(), location="US"
+    )
+    job_config = bigquery.LoadJobConfig(
+        autodetect=True,
+        source_format=bigquery.SourceFormat.PARQUET,
+        schema_update_options=bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION,
+    )
+    # since we have set the batch_size to 0, we get a filepath and can load the file directly
+    with open(items, "rb") as f:
+        load_job = client.load_table_from_file(f, BIGQUERY_TABLE_ID, job_config=job_config)
+    load_job.result()  # Waits for the job to complete.
+
+
+if __name__ == "__main__":
+    # run the pipeline and print load results
+    pipeline = dlt.pipeline(
+        pipeline_name="csv_to_bigquery_insert",
+        destination=bigquery_insert,
+        dataset_name="mydata",
+        full_refresh=True,
+    )
+    load_info = pipeline.run(resource(url=OWID_DISASTERS_URL))
+
+    print(load_info)
diff --git a/docs/tools/.env.example b/docs/tools/.env.example
new file mode 100644
index 0000000000..dd9d742228
--- /dev/null
+++ b/docs/tools/.env.example
@@ -0,0 +1 @@
+OPENAI_API_KEY="..."
diff --git a/docs/tools/README.md b/docs/tools/README.md
new file mode 100644
index 0000000000..28dfba2a44
--- /dev/null
+++ b/docs/tools/README.md
@@ -0,0 +1,62 @@
+# DLT docs tools
+
+This is a collection of useful tools to manage our docs. Some of these require additional dependencies not added
+to our pyproject.toml in the root dir. To install these with pip, run:
+
+```sh
+pip3 install -r requirements.txt
+```
+
+from this folder.
+
+## `check_embedded_snippets.py`
+This script find's all embedded snippets in our docs, extracts them and performs the following checks:
+
+* Snippet must have a valid language set, e.g. ```py
+* Snippet must be parseable (works for py, toml, yaml and json snippets)
+* Snippet must pass linting (works for py)
+* Coming soon: snippet must pass type checking with mypy
+
+This script is run on CI to ensure code quality in our docs.
+
+### Usage
+
+```sh
+# Run a full check on all snippets
+python check_embedded_snippets.py full
+
+# Show all available commands and arguments for this script
+python check_embedded_snippets.py --help
+
+# Only run the linting stage
+python check_embedded_snippets.py lint
+
+# Run all stages but only for snippets in files that have the string "walkthrough" in the filepath
+# you will probably be using this a lot when working on one doc page
+python check_embedded_snippets.py full -f walkthrough
+
+# Run the parsing stage, but only on snippets 49, 345 and 789
+python check_embedded_snippets.py parse -s 49,345,789
+
+# run all checks but with a bit more output to the terminal
+python check_embedded_snippets.py full -v
+```
+
+### Snippet numbers
+Each snippet will be assigned an index in the order it is encountered. This is useful during creation of new snippets in the docs to selectively only run a few snippets. These numbers will change as snippets are inserted into the docs.
+
+## `fix_grammar_gpt.py`
+This script will run all (or selected) docs markdown files through the open ai api to correct grammar. You will need to place the open ai key in an `.env` file in this or the root folder. See `.env.example`. We pay for each openai api call, so be a bit considerate of your usage :). It is good to check the grammar on new pages.
+
+### Usage
+
+```sh
+# Fix all pages
+python fix_grammar_gpt.py
+
+# Fix grammar for all files that have the string "walkthrough" in the filepath
+python fix_grammar_gpt.py -f walkthrough
+
+# Fix grammar for the particular file
+python fix_grammar_gpt.py -f ../website/docs/intro.md
+```
diff --git a/docs/tools/__init__.py b/docs/tools/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/tools/check_embedded_snippets.py b/docs/tools/check_embedded_snippets.py
new file mode 100644
index 0000000000..da27c1aa19
--- /dev/null
+++ b/docs/tools/check_embedded_snippets.py
@@ -0,0 +1,304 @@
+"""
+Walks through all markdown files, finds all code snippets, and checks wether they are parseable.
+"""
+from typing import List, Dict, Optional
+
+import os, ast, json, yaml, tomlkit, subprocess, argparse  # noqa: I251
+from dataclasses import dataclass
+from textwrap import dedent
+
+import dlt.cli.echo as fmt
+
+from utils import collect_markdown_files
+
+
+SNIPPET_MARKER = "```"
+ALLOWED_LANGUAGES = ["py", "toml", "json", "yaml", "text", "sh", "bat", "sql"]
+
+LINT_TEMPLATE = "./lint_setup/template.py"
+LINT_FILE = "./lint_setup/lint_me.py"
+
+ENABLE_MYPY = False
+
+
+@dataclass
+class Snippet:
+    index: int
+    language: str
+    code: str
+    file: str
+    line: int
+
+    def __str__(self) -> str:
+        return (
+            f"Snippet No. {self.index} in {self.file} at line {self.line} with language"
+            f" {self.language}"
+        )
+
+
+def collect_snippets(markdown_files: List[str], verbose: bool) -> List[Snippet]:
+    """
+    Extract all snippets from markdown files
+    """
+    snippets: List[Snippet] = []
+    index = 0
+    for file in markdown_files:
+        # go line by line and find all code  blocks
+        with open(file, "r", encoding="utf-8") as f:
+            current_snippet: Snippet = None
+            lint_count = 0
+            for line in f.readlines():
+                lint_count += 1
+                if line.strip().startswith(SNIPPET_MARKER):
+                    if current_snippet:
+                        # process snippet
+                        snippets.append(current_snippet)
+                        current_snippet.code = dedent(current_snippet.code)
+                        current_snippet = None
+                    else:
+                        # start new snippet
+                        index += 1
+                        current_snippet = Snippet(
+                            index=index,
+                            language=line.strip().split(SNIPPET_MARKER)[1] or "unknown",
+                            code="",
+                            file=file,
+                            line=lint_count,
+                        )
+                elif current_snippet:
+                    current_snippet.code += line
+        assert not current_snippet, (
+            "It seems that the last snippet in the file was not closed. Please check the file "
+            + file
+        )
+
+    fmt.note(f"Discovered {len(snippets)} snippets")
+    if verbose:
+        for lang in ALLOWED_LANGUAGES:
+            lang_count = len([s for s in snippets if s.language == lang])
+            fmt.echo(f"Found {lang_count} snippets marked as {lang}")
+    if len(snippets) < 100:  # sanity check
+        fmt.error("Found too few snippets. Something went wrong.")
+        exit(1)
+    return snippets
+
+
+def filter_snippets(snippets: List[Snippet], files: str, snippet_numbers: str) -> List[Snippet]:
+    """
+    Filter out snippets based on file or snippet number
+    """
+    fmt.secho(fmt.bold("Filtering Snippets"))
+    filtered_snippets: List[Snippet] = []
+    filtered_count = 0
+    for snippet in snippets:
+        if files and (files not in snippet.file):
+            filtered_count += 1
+            continue
+        elif snippet_numbers and (str(snippet.index) not in snippet_numbers):
+            filtered_count += 1
+            continue
+        filtered_snippets.append(snippet)
+    if filtered_count:
+        fmt.note(
+            f"{filtered_count} Snippets skipped based on file and snippet number settings."
+            f" {len(filtered_snippets)} snippets remaining."
+        )
+    else:
+        fmt.note("0 Snippets skipped based on file and snippet number settings")
+
+    if len(filtered_snippets) == 0:  # sanity check
+        fmt.error("No snippets remaining after filter, nothing to do.")
+        exit(1)
+    return filtered_snippets
+
+
+def check_language(snippets: List[Snippet]) -> None:
+    """
+    Check if the language is allowed
+    """
+    fmt.secho(fmt.bold("Checking snippets language settings"))
+    failed_count = 0
+    for snippet in snippets:
+        if snippet.language not in ALLOWED_LANGUAGES:
+            fmt.warning(f"{str(snippet)} has an invalid language {snippet.language} setting.")
+            failed_count += 1
+
+    if failed_count:
+        fmt.error(f"""\
+Found {failed_count} snippets with invalid language settings.
+* Please choose the correct language for your snippets: {ALLOWED_LANGUAGES}"
+* All sh commands, except for windows (bat), should be marked as sh.
+* All code blocks that are not a specific (markup-) language should be marked as text.\
+""")
+        exit(1)
+    else:
+        fmt.note("All snippets have valid language settings")
+
+
+def parse_snippets(snippets: List[Snippet], verbose: bool) -> None:
+    """
+    Parse all snippets with the respective parser library
+    """
+    fmt.secho(fmt.bold("Parsing snippets"))
+    failed_count = 0
+    for snippet in snippets:
+        # parse snippet by type
+        if verbose:
+            fmt.echo(f"Parsing {snippet}")
+        try:
+            if snippet.language == "py":
+                ast.parse(snippet.code)
+            elif snippet.language == "toml":
+                tomlkit.loads(snippet.code)
+            elif snippet.language == "json":
+                json.loads(snippet.code)
+            elif snippet.language == "yaml":
+                yaml.safe_load(snippet.code)
+            # ignore text and sh scripts
+            elif snippet.language in ["text", "sh", "bat", "sql"]:
+                pass
+            else:
+                raise ValueError(f"Unknown language {snippet.language}")
+        except Exception as exc:
+            fmt.warning(f"Failed to parse {str(snippet)}")
+            fmt.echo(exc)
+            failed_count += 1
+
+    if failed_count:
+        fmt.error(f"Failed to parse {failed_count} snippets")
+        exit(1)
+    else:
+        fmt.note("All snippets could be parsed")
+
+
+def prepare_for_linting(snippet: Snippet) -> None:
+    """
+    Prepare the lintme file with the snippet code and the template header
+    """
+    with open(LINT_TEMPLATE, "r", encoding="utf-8") as f:
+        lint_template = f.read()
+    with open(LINT_FILE, "w", encoding="utf-8") as f:
+        f.write(lint_template)
+        f.write("# Snippet start\n\n")
+        f.write(snippet.code)
+
+
+def lint_snippets(snippets: List[Snippet], verbose: bool) -> None:
+    """
+    Lint all python snippets with ruff
+    """
+    fmt.secho(fmt.bold("Linting Python snippets"))
+    failed_count = 0
+    count = 0
+    for snippet in snippets:
+        count += 1
+        prepare_for_linting(snippet)
+        result = subprocess.run(["ruff", "check", LINT_FILE], capture_output=True, text=True)
+        if verbose:
+            fmt.echo(f"Linting {snippet} ({count} of {len(snippets)})")
+        if "error" in result.stdout.lower():
+            failed_count += 1
+            fmt.warning(f"Failed to lint {str(snippet)}")
+            fmt.echo(result.stdout.strip())
+
+    if failed_count:
+        fmt.error(f"Failed to lint {failed_count} snippets")
+        exit(1)
+    else:
+        fmt.note("All snippets could be linted")
+
+
+def typecheck_snippets(snippets: List[Snippet], verbose: bool) -> None:
+    """
+    TODO: Type check all python snippets with mypy
+    """
+    fmt.secho(fmt.bold("Type checking Python snippets"))
+    failed_count = 0
+    count = 0
+    for snippet in snippets:
+        count += 1
+        if verbose:
+            fmt.echo(f"Type checking {snippet} ({count} of {len(snippets)})")
+        prepare_for_linting(snippet)
+        result = subprocess.run(["mypy", LINT_FILE], capture_output=True, text=True)
+        if "no issues found" not in result.stdout.lower():
+            failed_count += 1
+            fmt.warning(f"Failed to type check {str(snippet)}")
+            fmt.echo(result.stdout.strip())
+
+    if failed_count:
+        fmt.error(f"Failed to type check {failed_count} snippets")
+        exit(1)
+    else:
+        fmt.note("All snippets passed type checking")
+
+
+if __name__ == "__main__":
+    fmt.note(
+        "Welcome to Snippet Checker 3000, run 'python check_embedded_snippets.py --help' for help."
+    )
+
+    # setup cli
+    parser = argparse.ArgumentParser(
+        description=(
+            "Check embedded snippets. Discover, parse, lint, and type check all code snippets in"
+            " the docs."
+        ),
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "command",
+        help=(
+            'Which checks to run. "full" will run all checks, parse, lint or typecheck will only'
+            " run that specific step"
+        ),
+        choices=["full", "parse", "lint", "typecheck"],
+        default="full",
+    )
+    parser.add_argument("-v", "--verbose", help="Increase output verbosity", action="store_true")
+    parser.add_argument(
+        "-f",
+        "--files",
+        help="Filter .md files to files containing this string in filename",
+        type=str,
+    )
+    parser.add_argument(
+        "-s",
+        "--snippetnumbers",
+        help=(
+            "Filter checked snippets to snippetnumbers contained in this string, example:"
+            ' "13,412,345"'
+        ),
+        type=lambda i: i.split(","),
+        default=None,
+    )
+
+    args = parser.parse_args()
+
+    fmt.secho(fmt.bold("Discovering snippets"))
+
+    # find all markdown files and collect all snippets
+    markdown_files = collect_markdown_files(args.verbose)
+    snippets = collect_snippets(markdown_files, args.verbose)
+
+    # check language settings
+    check_language(snippets)
+
+    # filter snippets
+    filtered_snippets = filter_snippets(snippets, args.files, args.snippetnumbers)
+
+    if args.command in ["parse", "full"]:
+        parse_snippets(filtered_snippets, args.verbose)
+
+    # these stages are python only
+    python_snippets = [s for s in filtered_snippets if s.language == "py"]
+    if args.command in ["lint", "full"]:
+        lint_snippets(python_snippets, args.verbose)
+    if ENABLE_MYPY and args.command in ["typecheck", "full"]:
+        typecheck_snippets(python_snippets, args.verbose)
+
+    # unlink lint_me file
+    if os.path.exists(LINT_FILE):
+        os.unlink(LINT_FILE)
+
+    fmt.note("All selected checks passed. Snippet Checker 3000 signing off.")
diff --git a/docs/tools/fix_grammar_gpt.py b/docs/tools/fix_grammar_gpt.py
new file mode 100644
index 0000000000..1e4cf748dd
--- /dev/null
+++ b/docs/tools/fix_grammar_gpt.py
@@ -0,0 +1,86 @@
+"""
+Fixes the grammar of all the markdown files in the docs/website/docs directory.
+Required openai package to be installed, and an .env file with the open ai api key to be present in the root directory:
+OPENAI_API_KEY="..."
+"""
+import os
+import argparse
+
+from openai import OpenAI
+from dotenv import load_dotenv
+
+import dlt.cli.echo as fmt
+
+from utils import collect_markdown_files
+
+# constants
+BASE_DIR = "../website/docs"
+GPT_MODEL = "gpt-3.5-turbo-0125"
+
+SYSTEM_PROMPT = """\
+You are a grammar checker. Every message you get will be a document that is to be grammarchecked and returned as such.
+You will not change the markdown syntax. You will only fix the grammar. You will not change the code snippets except for the comments therein.
+You will not modify the header section which is enclosed by two occurences of "---".
+Do not change the spelling or casing of these words: dlt, sdf, dbt
+"""
+
+if __name__ == "__main__":
+    load_dotenv()
+
+    fmt.note("Welcome to Grammar Fixer 3000, run 'python fix_grammar_gpt.py --help' for help.")
+
+    # setup cli
+    parser = argparse.ArgumentParser(
+        description=(
+            "Fixes the grammar of our docs with open ai. Requires an .env file with the open ai"
+            " key."
+        ),
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument("-v", "--verbose", help="Increase output verbosity", action="store_true")
+    parser.add_argument(
+        "-f",
+        "--files",
+        help="Specify the file name. Grammar Checker will filter all .md files containing this string in the filepath.",
+        type=str,
+    )
+
+    # get args
+    args = parser.parse_args()
+
+    # find all files
+    markdown_files = collect_markdown_files(args.verbose)
+
+    # filter files
+    if args.files:
+        markdown_files = [f for f in markdown_files if args.files in f]
+
+    # run grammar check
+    count = 0
+    for file_path in markdown_files:
+        count += 1
+
+        fmt.note(f"Fixing grammar for file {file_path} ({count} of {len(markdown_files)})")
+
+        with open(file_path, "r", encoding="utf-8") as f:
+            doc = f.readlines()
+
+        client = OpenAI()
+        response = client.chat.completions.create(
+            model=GPT_MODEL,
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": "".join(doc)},
+            ],
+            temperature=0,
+        )
+
+        fixed_doc = response.choices[0].message.content
+
+        with open(file_path, "w", encoding="utf-8") as f:
+            f.writelines(fixed_doc)
+
+    if count == 0:
+        fmt.warning("No files selected for grammar check.")
+    else:
+        fmt.note(f"Fixed grammar for {count} files.")
diff --git a/docs/tools/lint_setup/.gitignore b/docs/tools/lint_setup/.gitignore
new file mode 100644
index 0000000000..27479bdb04
--- /dev/null
+++ b/docs/tools/lint_setup/.gitignore
@@ -0,0 +1 @@
+lint_me.py
\ No newline at end of file
diff --git a/docs/tools/lint_setup/__init__.py b/docs/tools/lint_setup/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/tools/lint_setup/template.py b/docs/tools/lint_setup/template.py
new file mode 100644
index 0000000000..dcfada63f6
--- /dev/null
+++ b/docs/tools/lint_setup/template.py
@@ -0,0 +1,35 @@
+# This section is imported before linting
+
+# mypy: disable-error-code="name-defined,import-not-found,import-untyped,empty-body,no-redef"
+
+# some universal imports
+from typing import Optional, Dict, List, Any, Iterable, Iterator, Tuple, Sequence, Callable
+
+import os
+
+import pendulum
+from pendulum import DateTime
+from datetime import datetime  # noqa: I251
+
+import dlt
+from dlt.common import json
+from dlt.common.typing import TimedeltaSeconds, TAnyDateTime, TDataItem, TDataItems
+from dlt.common.schema.typing import TTableSchema, TTableSchemaColumns
+
+from dlt.common.pipeline import LoadInfo
+from dlt.sources.helpers import requests
+from dlt.extract import DltResource, DltSource
+from dlt.common.configuration.specs import (
+    GcpServiceAccountCredentials,
+    ConnectionStringCredentials,
+    OAuth2Credentials,
+    BaseConfiguration,
+)
+from dlt.common.storages.configuration import FileSystemCredentials
+
+# some universal variables
+pipeline: dlt.Pipeline = None  # type: ignore[assignment]
+p: dlt.Pipeline = None  # type: ignore[assignment]
+ex: Exception = None  # type: ignore[assignment]
+load_info: LoadInfo = None  # type: ignore[assignment]
+url: str = None  # type: ignore[assignment]
diff --git a/docs/tools/mypy.ini b/docs/tools/mypy.ini
new file mode 100644
index 0000000000..167ad5b30e
--- /dev/null
+++ b/docs/tools/mypy.ini
@@ -0,0 +1,4 @@
+[mypy]
+ignore_missing_imports = True
+no_implicit_optional = False
+strict_optional = False
\ No newline at end of file
diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt
new file mode 100644
index 0000000000..48db2b38da
--- /dev/null
+++ b/docs/tools/requirements.txt
@@ -0,0 +1,2 @@
+python-dotenv==1.0.1
+openai==1.14.2
diff --git a/docs/tools/ruff.toml b/docs/tools/ruff.toml
new file mode 100644
index 0000000000..96f9432ecc
--- /dev/null
+++ b/docs/tools/ruff.toml
@@ -0,0 +1,2 @@
+[lint]
+ignore = ["F811", "F821", "F401", "F841", "E402"]
diff --git a/docs/tools/utils.py b/docs/tools/utils.py
new file mode 100644
index 0000000000..074b19b8e1
--- /dev/null
+++ b/docs/tools/utils.py
@@ -0,0 +1,32 @@
+from typing import List
+import os
+
+import dlt.cli.echo as fmt
+
+
+DOCS_DIR = "../website/docs"
+
+
+def collect_markdown_files(verbose: bool) -> List[str]:
+    """
+    Discovers all docs markdown files
+    """
+    markdown_files: List[str] = []
+    for path, _, files in os.walk(DOCS_DIR):
+        if "api_reference" in path:
+            continue
+        if "jaffle_shop" in path:
+            continue
+        for file in files:
+            if file.endswith(".md"):
+                markdown_files.append(os.path.join(path, file))
+                if verbose:
+                    fmt.echo(f"Discovered {os.path.join(path, file)}")
+
+    if len(markdown_files) < 50:  # sanity check
+        fmt.error("Found too few files. Something went wrong.")
+        exit(1)
+
+    fmt.note(f"Discovered {len(markdown_files)} markdown files")
+
+    return markdown_files
diff --git a/docs/website/blog/2023-10-09-dlt-ops-startups.md b/docs/website/blog/2023-10-09-dlt-ops-startups.md
index c48fd9ed95..94c1ff662b 100644
--- a/docs/website/blog/2023-10-09-dlt-ops-startups.md
+++ b/docs/website/blog/2023-10-09-dlt-ops-startups.md
@@ -112,7 +112,7 @@ Customize the INVOICE_QUERIES dictionary in the `unstructured_data/settings.py`
 
 And now the magic happens. Use the following command to run the pipeline:
 
-```shell
+```sh
 python unstructured_data_pipeline.py
 ```
 
diff --git a/docs/website/blog/2024-01-15-dlt-dbt-runner-on-cloud-functions.md b/docs/website/blog/2024-01-15-dlt-dbt-runner-on-cloud-functions.md
index 227c466d37..b36748aed9 100644
--- a/docs/website/blog/2024-01-15-dlt-dbt-runner-on-cloud-functions.md
+++ b/docs/website/blog/2024-01-15-dlt-dbt-runner-on-cloud-functions.md
@@ -132,7 +132,7 @@ We recommend setting up and testing dbt-core locally before using it in cloud fu
 
 1. Finally, you can deploy the function using gcloud CLI as:
 
-   ```shell
+   ```sh
    gcloud functions deploy YOUR_FUNCTION_NAME \
    --gen2 \
    --region=YOUR_REGION \
@@ -313,7 +313,7 @@ To integrate dlt and dbt in cloud functions, use the dlt-dbt runner; here’s ho
 
 1. Finally, you can deploy the function using gcloud CLI as:
 
-   ```shell
+   ```sh
    gcloud functions deploy YOUR_FUNCTION_NAME \
    --gen2 \
    --region=YOUR_REGION \
diff --git a/docs/website/docs/build-a-pipeline-tutorial.md b/docs/website/docs/build-a-pipeline-tutorial.md
index 90a175777f..1522761609 100644
--- a/docs/website/docs/build-a-pipeline-tutorial.md
+++ b/docs/website/docs/build-a-pipeline-tutorial.md
@@ -36,7 +36,7 @@ scalable extraction via micro-batching and parallelism.
 
 ## The simplest pipeline: 1 liner to load data with schema evolution
 
-```python
+```py
 import dlt
 
 dlt.pipeline(destination='duckdb', dataset_name='mydata').run([{'id': 1, 'name': 'John'}], table_name="users")
@@ -52,7 +52,7 @@ named "three". With `dlt`, you can create a pipeline and run it with just a few
 1. [Create a pipeline](walkthroughs/create-a-pipeline.md) to the [destination](dlt-ecosystem/destinations).
 1. Give this pipeline data and [run it](walkthroughs/run-a-pipeline.md).
 
-```python
+```py
 import dlt
 
 pipeline = dlt.pipeline(destination="duckdb", dataset_name="country_data")
@@ -84,7 +84,7 @@ In this example, we also run a dbt package and then load the outcomes of the loa
 This will enable us to log when schema changes occurred and match them to the loaded data for lineage, granting us both column and row level lineage.
 We also alert the schema change to a Slack channel where hopefully the producer and consumer are subscribed.
 
-```python
+```py
 import dlt
 
 # have data? dlt likes data
@@ -105,7 +105,7 @@ load_info = pipeline.run(
 )
 ```
 Add dbt runner, optionally with venv:
-```python
+```py
 venv = dlt.dbt.get_venv(pipeline)
 dbt = dlt.dbt.package(
     pipeline,
@@ -122,7 +122,7 @@ pipeline.run([models_info], table_name="transform_status", write_disposition='ap
 ```
 
 Let's alert any schema changes:
-```python
+```py
 from dlt.common.runtime.slack import send_slack_message
 
 slack_hook = "https://hooks.slack.com/services/xxx/xxx/xxx"
@@ -211,7 +211,7 @@ that only one instance of each event is present.
 
 You can use the merge write disposition as follows:
 
-```python
+```py
 @dlt.resource(primary_key="id", write_disposition="merge")
 def github_repo_events():
     yield from _get_event_pages()
@@ -260,7 +260,7 @@ into DAGs, providing cross-database compatibility and various features such as t
 backfills, testing, and troubleshooting. You can use the dbt runner in `dlt` to seamlessly
 integrate dbt into your pipeline. Here's an example of running a dbt package after loading the data:
 
-```python
+```py
 import dlt
 from pipedrive import pipedrive_source
 
@@ -275,7 +275,7 @@ load_info = pipeline.run(pipedrive_source())
 print(load_info)
 ```
 Now transform from loaded data to dbt dataset:
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name='pipedrive',
     destination='bigquery',
@@ -306,7 +306,7 @@ transformations using SQL statements. You can execute SQL statements that change
 or manipulate data within tables. Here's an example of inserting a row into the `customers`
 table using the `dlt` SQL client:
 
-```python
+```py
 pipeline = dlt.pipeline(destination="bigquery", dataset_name="crm")
 
 with pipeline.sql_client() as client:
@@ -324,7 +324,7 @@ You can fetch query results as Pandas data frames and perform transformations us
 functionalities. Here's an example of reading data from the `issues` table in DuckDB and
 counting reaction types using Pandas:
 
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name="github_pipeline",
     destination="duckdb",
diff --git a/docs/website/docs/dlt-ecosystem/destinations/athena.md b/docs/website/docs/dlt-ecosystem/destinations/athena.md
index 9fc5dc15f9..26be75869b 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/athena.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/athena.md
@@ -6,46 +6,45 @@ keywords: [aws, athena, glue catalog]
 
 # AWS Athena / Glue Catalog
 
-The athena destination stores data as parquet files in s3 buckets and creates [external tables in aws athena](https://docs.aws.amazon.com/athena/latest/ug/creating-tables.html). You can then query those tables with athena sql commands which will then scan the whole folder of parquet files and return the results. This destination works very similar to other sql based destinations, with the exception of the merge write disposition not being supported at this time. dlt metadata will be stored in the same bucket as the parquet files, but as iceberg tables. Athena additionally supports writing individual data tables as iceberg tables, so the may be manipulated later, a common use-case would be to strip gdpr data from them.
+The Athena destination stores data as Parquet files in S3 buckets and creates [external tables in AWS Athena](https://docs.aws.amazon.com/athena/latest/ug/creating-tables.html). You can then query those tables with Athena SQL commands, which will scan the entire folder of Parquet files and return the results. This destination works very similarly to other SQL-based destinations, with the exception that the merge write disposition is not supported at this time. The `dlt` metadata will be stored in the same bucket as the Parquet files, but as iceberg tables. Athena also supports writing individual data tables as Iceberg tables, so they may be manipulated later. A common use case would be to strip GDPR data from them.
 
 ## Install dlt with Athena
 **To install the DLT library with Athena dependencies:**
-```
+```sh
 pip install dlt[athena]
 ```
 
 ## Setup Guide
 ### 1. Initialize the dlt project
 
-Let's start by initializing a new dlt project as follows:
-   ```bash
+Let's start by initializing a new `dlt` project as follows:
+   ```sh
    dlt init chess athena
    ```
-   > 💡 This command will initialise your pipeline with chess as the source and aws athena as the destination using the filesystem staging destination
+   > 💡 This command will initialize your pipeline with chess as the source and AWS Athena as the destination using the filesystem staging destination.
 
 
-### 2. Setup bucket storage and athena credentials
+### 2. Setup bucket storage and Athena credentials
 
-First install dependencies by running:
-```
+First, install dependencies by running:
+```sh
 pip install -r requirements.txt
 ```
-or with `pip install dlt[athena]` which will install `s3fs`, `pyarrow`, `pyathena` and `botocore` packages.
+or with `pip install dlt[athena]`, which will install `s3fs`, `pyarrow`, `pyathena`, and `botocore` packages.
 
 :::caution
 
-You may also install the dependencies independently
-try
+You may also install the dependencies independently. Try
 ```sh
 pip install dlt
 pip install s3fs
 pip install pyarrow
 pip install pyathena
 ```
-so pip does not fail on backtracking
+so pip does not fail on backtracking.
 :::
 
-To edit the `dlt` credentials file with your secret info, open `.dlt/secrets.toml`. You will need to provide a `bucket_url` which holds the uploaded parquet files, a `query_result_bucket` which athena uses to write query results too, and credentials that have write and read access to these two buckets as well as the full athena access aws role.
+To edit the `dlt` credentials file with your secret info, open `.dlt/secrets.toml`. You will need to provide a `bucket_url`, which holds the uploaded parquet files, a `query_result_bucket`, which Athena uses to write query results to, and credentials that have write and read access to these two buckets as well as the full Athena access AWS role.
 
 The toml file looks like this:
 
@@ -63,10 +62,10 @@ query_result_bucket="s3://[results_bucket_name]" # replace with your query resul
 [destination.athena.credentials]
 aws_access_key_id="please set me up!" # same as credentials for filesystem
 aws_secret_access_key="please set me up!" # same as credentials for filesystem
-region_name="please set me up!" # set your aws region, for example "eu-central-1" for frankfurt
+region_name="please set me up!" # set your AWS region, for example "eu-central-1" for Frankfurt
 ```
 
-if you have your credentials stored in `~/.aws/credentials` just remove the **[destination.filesystem.credentials]** and **[destination.athena.credentials]** section above and `dlt` will fall back to your **default** profile in local credentials. If you want to switch the  profile, pass the profile name as follows (here: `dlt-ci-user`):
+If you have your credentials stored in `~/.aws/credentials`, just remove the **[destination.filesystem.credentials]** and **[destination.athena.credentials]** section above and `dlt` will fall back to your **default** profile in local credentials. If you want to switch the profile, pass the profile name as follows (here: `dlt-ci-user`):
 ```toml
 [destination.filesystem.credentials]
 profile_name="dlt-ci-user"
@@ -77,7 +76,7 @@ profile_name="dlt-ci-user"
 
 ## Additional Destination Configuration
 
-You can provide an athena workgroup like so:
+You can provide an Athena workgroup like so:
 ```toml
 [destination.athena]
 athena_work_group="my_workgroup"
@@ -85,75 +84,70 @@ athena_work_group="my_workgroup"
 
 ## Write disposition
 
-`athena` destination handles the write dispositions as follows:
-- `append` - files belonging to such tables are added to dataset folder
-- `replace` - all files that belong to such tables are deleted from dataset folder and then current set of files is added.
-- `merge` - falls back to `append`
+The `athena` destination handles the write dispositions as follows:
+- `append` - files belonging to such tables are added to the dataset folder.
+- `replace` - all files that belong to such tables are deleted from the dataset folder, and then the current set of files is added.
+- `merge` - falls back to `append`.
 
 ## Data loading
 
-Data loading happens by storing parquet files in an s3 bucket and defining a schema on athena. If you query data via SQL queries on athena, the returned data is read by
-scanning your bucket and reading all relevant parquet files in there.
+Data loading happens by storing parquet files in an S3 bucket and defining a schema on Athena. If you query data via SQL queries on Athena, the returned data is read by scanning your bucket and reading all relevant parquet files in there.
 
 `dlt` internal tables are saved as Iceberg tables.
 
 ### Data types
-Athena tables store timestamps with millisecond precision and with that precision we generate parquet files. Mind that Iceberg tables have microsecond precision.
+Athena tables store timestamps with millisecond precision, and with that precision, we generate parquet files. Keep in mind that Iceberg tables have microsecond precision.
 
-Athena does not support JSON fields so JSON is stored as string.
+Athena does not support JSON fields, so JSON is stored as a string.
 
 > ❗**Athena does not support TIME columns in parquet files**. `dlt` will fail such jobs permanently. Convert `datetime.time` objects to `str` or `datetime.datetime` to load them.
 
 ### Naming Convention
-We follow our snake_case name convention. Mind the following:
-* DDL use HIVE escaping with ``````
+We follow our snake_case name convention. Keep the following in mind:
+* DDL uses HIVE escaping with ``````
 * Other queries use PRESTO and regular SQL escaping.
 
 ## Staging support
 
-Using a staging destination is mandatory when using the athena destination. If you do not set staging to `filesystem`, dlt will automatically do this for you.
+Using a staging destination is mandatory when using the Athena destination. If you do not set staging to `filesystem`, `dlt` will automatically do this for you.
 
 If you decide to change the [filename layout](./filesystem#data-loading) from the default value, keep the following in mind so that Athena can reliably build your tables:
- - You need to provide the `{table_name}` placeholder and this placeholder needs to be followed by a forward slash
- - You need to provide the `{file_id}` placeholder and it needs to be somewhere after the `{table_name}` placeholder.
- - {table_name} must be the first placeholder in the layout.
+ - You need to provide the `{table_name}` placeholder, and this placeholder needs to be followed by a forward slash.
+ - You need to provide the `{file_id}` placeholder, and it needs to be somewhere after the `{table_name}` placeholder.
+ - `{table_name}` must be the first placeholder in the layout.
 
 
 ## Additional destination options
 
-### iceberg data tables
-You can save your tables as iceberg tables to athena. This will enable you to for example delete data from them later if you need to. To switch a resouce to the iceberg table-format,
-supply the table_format argument like this:
+### Iceberg data tables
+You can save your tables as Iceberg tables to Athena. This will enable you, for example, to delete data from them later if you need to. To switch a resource to the iceberg table format, supply the table_format argument like this:
 
-```python
+```py
 @dlt.resource(table_format="iceberg")
 def data() -> Iterable[TDataItem]:
     ...
 ```
 
-Alternatively you can set all tables to use the iceberg format with a config variable:
+Alternatively, you can set all tables to use the iceberg format with a config variable:
 
 ```toml
 [destination.athena]
 force_iceberg = "True"
 ```
 
-For every table created as an iceberg table, the athena destination will create a regular athena table in the staging dataset of both the filesystem as well as the athena glue catalog and then
-copy all data into the final iceberg table that lives with the non-iceberg tables in the same dataset on both filesystem and the glue catalog. Switching from iceberg to regular table or vice versa
-is not supported.
+For every table created as an iceberg table, the Athena destination will create a regular Athena table in the staging dataset of both the filesystem and the Athena glue catalog, and then copy all data into the final iceberg table that lives with the non-iceberg tables in the same dataset on both the filesystem and the glue catalog. Switching from iceberg to regular table or vice versa is not supported.
 
 ### dbt support
 
-Athena is supported via `dbt-athena-community`. Credentials are passed into `aws_access_key_id` and `aws_secret_access_key` of generated dbt profile. Iceberg tables are supported but you need to make sure that you materialize your models as iceberg tables if your source table is iceberg. We encountered problems with materializing
-date time columns due to different precision on iceberg (nanosecond) and regular Athena tables (millisecond).
-The Athena adapter requires that you setup **region_name** in Athena configuration below. You can also setup table catalog name to change the default: **awsdatacatalog**
+Athena is supported via `dbt-athena-community`. Credentials are passed into `aws_access_key_id` and `aws_secret_access_key` of the generated dbt profile. Iceberg tables are supported, but you need to make sure that you materialize your models as iceberg tables if your source table is iceberg. We encountered problems with materializing date time columns due to different precision on iceberg (nanosecond) and regular Athena tables (millisecond).
+The Athena adapter requires that you set up **region_name** in the Athena configuration below. You can also set up the table catalog name to change the default: **awsdatacatalog**
 ```toml
 [destination.athena]
 aws_data_catalog="awsdatacatalog"
 ```
 
 ### Syncing of `dlt` state
-- This destination fully supports [dlt state sync.](../../general-usage/state#syncing-state-with-destination). The state is saved in athena iceberg tables in your s3 bucket.
+- This destination fully supports [dlt state sync.](../../general-usage/state#syncing-state-with-destination). The state is saved in Athena iceberg tables in your S3 bucket.
 
 
 ## Supported file formats
diff --git a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md
index 25b01923b5..4144707b03 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md
@@ -10,7 +10,7 @@ keywords: [bigquery, destination, data warehouse]
 
 **To install the DLT library with BigQuery dependencies:**
 
-```
+```sh
 pip install dlt[bigquery]
 ```
 
@@ -18,17 +18,17 @@ pip install dlt[bigquery]
 
 **1. Initialize a project with a pipeline that loads to BigQuery by running:**
 
-```
+```sh
 dlt init chess bigquery
 ```
 
 **2. Install the necessary dependencies for BigQuery by running:**
 
-```
+```sh
 pip install -r requirements.txt
 ```
 
-This will install dlt with **bigquery** extra, which contains all the dependencies required by the bigquery client.
+This will install dlt with the `bigquery` extra, which contains all the dependencies required by the bigquery client.
 
 **3. Log in to or create a Google Cloud account**
 
@@ -58,7 +58,7 @@ You don't need to grant users access to this service account now, so click the `
 In the service accounts table page that you're redirected to after clicking `Done` as instructed above,
 select the three dots under the `Actions` column for the service account you created and select `Manage keys`.
 
-This will take you to page where you can click the `Add key` button, then the `Create new key` button,
+This will take you to a page where you can click the `Add key` button, then the `Create new key` button,
 and finally the `Create` button, keeping the preselected `JSON` option.
 
 A `JSON` file that includes your service account private key will then be downloaded.
@@ -67,7 +67,7 @@ A `JSON` file that includes your service account private key will then be downlo
 
 Open your `dlt` credentials file:
 
-```
+```sh
 open .dlt/secrets.toml
 ```
 
@@ -83,11 +83,11 @@ private_key = "private_key" # please set me up!
 client_email = "client_email" # please set me up!
 ```
 
-You can specify the location of the data i.e. `EU` instead of `US` which is a default.
+You can specify the location of the data i.e. `EU` instead of `US` which is the default.
 
 ### OAuth 2.0 Authentication
 
-You can use the OAuth 2.0 authentication. You'll need to generate a **refresh token** with right scopes (I suggest to ask our GPT-4 assistant for details).
+You can use OAuth 2.0 authentication. You'll need to generate a **refresh token** with the right scopes (we suggest asking our GPT-4 assistant for details).
 Then you can fill the following information in `secrets.toml`
 
 ```toml
@@ -103,9 +103,9 @@ refresh_token = "refresh_token"  # please set me up!
 
 ### Using Default Credentials
 
-Google provides several ways to get default credentials i.e. from `GOOGLE_APPLICATION_CREDENTIALS` environment variable or metadata services.
+Google provides several ways to get default credentials i.e. from the `GOOGLE_APPLICATION_CREDENTIALS` environment variable or metadata services.
 VMs available on GCP (cloud functions, Composer runners, Colab notebooks) have associated service accounts or authenticated users.
-Will try to use default credentials if nothing is explicitly specified in the secrets.
+`dlt` will try to use default credentials if nothing is explicitly specified in the secrets.
 
 ```toml
 [destination.bigquery]
@@ -114,16 +114,16 @@ location = "US"
 
 ## Write Disposition
 
-All write dispositions are supported
+All write dispositions are supported.
 
-If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized` the destination tables will be dropped and
+If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized`, the destination tables will be dropped and
 recreated with a [clone command](https://cloud.google.com/bigquery/docs/table-clones-create) from the staging tables.
 
 ## Data Loading
 
-`dlt` uses `BigQuery` load jobs that send files from local filesystem or gcs buckets.
-Loader follows [Google recommendations](https://cloud.google.com/bigquery/docs/error-messages) when retrying and terminating jobs.
-Google BigQuery client implements elaborate retry mechanism and timeouts for queries and file uploads, which may be configured in destination options.
+`dlt` uses `BigQuery` load jobs that send files from the local filesystem or GCS buckets.
+The loader follows [Google recommendations](https://cloud.google.com/bigquery/docs/error-messages) when retrying and terminating jobs.
+The Google BigQuery client implements an elaborate retry mechanism and timeouts for queries and file uploads, which may be configured in destination options.
 
 ## Supported File Formats
 
@@ -143,36 +143,36 @@ When staging is enabled:
 
 BigQuery supports the following [column hints](https://dlthub.com/docs/general-usage/schema#tables-and-columns):
 
-* `partition` - creates a partition with a day granularity on decorated column (`PARTITION BY DATE`).
-  May be used with `datetime`, `date` and `bigint` data types.
+* `partition` - creates a partition with a day granularity on the decorated column (`PARTITION BY DATE`).
+  May be used with `datetime`, `date`, and `bigint` data types.
   Only one column per table is supported and only when a new table is created.
   For more information on BigQuery partitioning, read the [official docs](https://cloud.google.com/bigquery/docs/partitioned-tables).
 
   > ❗ `bigint` maps to BigQuery's **INT64** data type.
   > Automatic partitioning requires converting an INT64 column to a UNIX timestamp, which `GENERATE_ARRAY` doesn't natively support.
   > With a 10,000 partition limit, we can’t cover the full INT64 range.
-  > Instead, we set 86,400 second boundaries to enable daily partitioning.
+  > Instead, we set 86,400-second boundaries to enable daily partitioning.
   > This captures typical values, but extremely large/small outliers go to an `__UNPARTITIONED__` catch-all partition.
 
 * `cluster` - creates a cluster column(s). Many columns per table are supported and only when a new table is created.
 
 ## Staging Support
 
-BigQuery supports gcs as a file staging destination. dlt will upload files in the parquet format to gcs and ask BigQuery to copy their data directly into the db.
-Please refer to the [Google Storage filesystem documentation](./filesystem.md#google-storage) to learn how to set up your gcs bucket with the bucket_url and credentials.
-If you use the same service account for gcs and your redshift deployment, you do not need to provide additional authentication for BigQuery to be able to read from your bucket.
+BigQuery supports GCS as a file staging destination. `dlt` will upload files in the parquet format to GCS and ask BigQuery to copy their data directly into the database.
+Please refer to the [Google Storage filesystem documentation](./filesystem.md#google-storage) to learn how to set up your GCS bucket with the bucket_url and credentials.
+If you use the same service account for GCS and your Redshift deployment, you do not need to provide additional authentication for BigQuery to be able to read from your bucket.
 
-Alternatively to parquet files, you can specify jsonl as the staging file format. For this set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`.
+Alternatively to parquet files, you can specify jsonl as the staging file format. For this, set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`.
 
 ### BigQuery/GCS Staging Example
 
-```python
+```py
 # Create a dlt pipeline that will load
 # chess player data to the BigQuery destination
-# via a gcs bucket.
+# via a GCS bucket.
 pipeline = dlt.pipeline(
     pipeline_name='chess_pipeline',
-    destination='biquery',
+    destination='bigquery',
     staging='filesystem', # Add this to activate the staging location.
     dataset_name='player_data'
 )
@@ -180,7 +180,7 @@ pipeline = dlt.pipeline(
 
 ## Additional Destination Options
 
-You can configure the data location and various timeouts as shown below. This information is not a secret so can be placed in `config.toml` as well:
+You can configure the data location and various timeouts as shown below. This information is not a secret so it can be placed in `config.toml` as well:
 
 ```toml
 [destination.bigquery]
@@ -191,15 +191,15 @@ retry_deadline=60.0
 ```
 
 * `location` sets the [BigQuery data location](https://cloud.google.com/bigquery/docs/locations) (default: **US**)
-* `http_timeout` sets the timeout when connecting and getting a response from BigQuery API (default: **15 seconds**)
-* `file_upload_timeout` a timeout for file upload when loading local files: the total time of the upload may not exceed this value (default: **30 minutes**, set in seconds)
-* `retry_deadline` a deadline for a [DEFAULT_RETRY used by Google](https://cloud.google.com/python/docs/reference/storage/1.39.0/retry_timeout)
+* `http_timeout` sets the timeout when connecting and getting a response from the BigQuery API (default: **15 seconds**)
+* `file_upload_timeout` is a timeout for file upload when loading local files: the total time of the upload may not exceed this value (default: **30 minutes**, set in seconds)
+* `retry_deadline` is a deadline for a [DEFAULT_RETRY used by Google](https://cloud.google.com/python/docs/reference/storage/1.39.0/retry_timeout)
 
 ### dbt Support
 
 This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-bigquery](https://github.com/dbt-labs/dbt-bigquery).
 Credentials, if explicitly defined, are shared with `dbt` along with other settings like **location** and retries and timeouts.
-In case of implicit credentials (i.e. available in cloud function), `dlt` shares the `project_id` and delegates obtaining credentials to `dbt` adapter.
+In the case of implicit credentials (i.e. available in a cloud function), `dlt` shares the `project_id` and delegates obtaining credentials to the `dbt` adapter.
 
 ### Syncing of `dlt` State
 
@@ -215,9 +215,9 @@ The adapter updates the DltResource with metadata about the destination column a
 
 ### Use an Adapter to Apply Hints to a Resource
 
-Here is an example of how to use the `bigquery_adapter` method to apply hints to a resource on both column level and table level:
+Here is an example of how to use the `bigquery_adapter` method to apply hints to a resource on both the column level and table level:
 
-```python
+```py
 from datetime import date, timedelta
 
 import dlt
@@ -246,9 +246,9 @@ bigquery_adapter(
 bigquery_adapter(event_data, table_description="Dummy event data.")
 ```
 
-Above, the adapter specifies that `event_date` should be used for partitioning and both `event_date` and `user_id` should be used for clustering (in the given order) when the table is created.
+In the example above, the adapter specifies that `event_date` should be used for partitioning and both `event_date` and `user_id` should be used for clustering (in the given order) when the table is created.
 
-Some things to note with the adapter's behaviour:
+Some things to note with the adapter's behavior:
 
 - You can only partition on one column (refer to [supported hints](#supported-column-hints)).
 - You can cluster on as many columns as you would like.
@@ -258,7 +258,7 @@ Some things to note with the adapter's behaviour:
 
 Note that `bigquery_adapter` updates the resource *inplace*, but returns the resource for convenience, i.e. both the following are valid:
 
-```python
+```py
 bigquery_adapter(my_resource, partition="partition_column_name")
 my_resource = bigquery_adapter(my_resource, partition="partition_column_name")
 ```
diff --git a/docs/website/docs/dlt-ecosystem/destinations/databricks.md b/docs/website/docs/dlt-ecosystem/destinations/databricks.md
index fc100e41e2..8078d2c64d 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/databricks.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/databricks.md
@@ -7,11 +7,11 @@ keywords: [Databricks, destination, data warehouse]
 ---
 
 # Databricks
-*Big thanks to Evan Phillips and [swishbi.com](https://swishbi.com/) for contributing code, time and test environment*
+*Big thanks to Evan Phillips and [swishbi.com](https://swishbi.com/) for contributing code, time, and a test environment.*
 
 ## Install dlt with Databricks
 **To install the DLT library with Databricks dependencies:**
-```
+```sh
 pip install dlt[databricks]
 ```
 
@@ -28,7 +28,7 @@ If you already have your Databricks workspace set up, you can skip to the [Loade
 
 1. Create a Databricks workspace in Azure
 
-    In your Azure Portal search for Databricks and create a new workspace. In the "Pricing Tier" section, select "Premium" to be able to use the Unity Catalog.
+    In your Azure Portal, search for Databricks and create a new workspace. In the "Pricing Tier" section, select "Premium" to be able to use the Unity Catalog.
 
 2. Create an ADLS Gen 2 storage account
 
@@ -42,7 +42,7 @@ If you already have your Databricks workspace set up, you can skip to the [Loade
 4. Create an Access Connector for Azure Databricks
 
     This will allow Databricks to access your storage account.
-    In the Azure Portal search for "Access Connector for Azure Databricks" and create a new connector.
+    In the Azure Portal, search for "Access Connector for Azure Databricks" and create a new connector.
 
 5. Grant access to your storage container
 
@@ -54,16 +54,16 @@ If you already have your Databricks workspace set up, you can skip to the [Loade
 
 1. Now go to your Databricks workspace
 
-    To get there from the Azure Portal, search for "Databricks" and select your Databricks and click "Launch Workspace".
+    To get there from the Azure Portal, search for "Databricks", select your Databricks, and click "Launch Workspace".
 
 2. In the top right corner, click on your email address and go to "Manage Account"
 
 3. Go to "Data" and click on "Create Metastore"
 
     Name your metastore and select a region.
-    If you'd like to set up a storage container for the whole metastore you can add your ADLS URL and Access Connector Id here. You can also do this on a granular level when creating the catalog.
+    If you'd like to set up a storage container for the whole metastore, you can add your ADLS URL and Access Connector Id here. You can also do this on a granular level when creating the catalog.
 
-    In the next step assign your metastore to your workspace.
+    In the next step, assign your metastore to your workspace.
 
 4. Go back to your workspace and click on "Catalog" in the left-hand menu
 
@@ -77,7 +77,7 @@ If you already have your Databricks workspace set up, you can skip to the [Loade
 
     Set the URL of our storage container. This should be in the form: `abfss://<container_name>@<storage_account_name>.dfs.core.windows.net/<path>`
 
-    Once created you can test the connection to make sure the container is accessible from databricks.
+    Once created, you can test the connection to make sure the container is accessible from Databricks.
 
 7. Now you can create a catalog
 
@@ -91,12 +91,12 @@ If you already have your Databricks workspace set up, you can skip to the [Loade
 ## Loader setup Guide
 
 **1. Initialize a project with a pipeline that loads to Databricks by running**
-```
+```sh
 dlt init chess databricks
 ```
 
 **2. Install the necessary dependencies for Databricks by running**
-```
+```sh
 pip install -r requirements.txt
 ```
 This will install dlt with **databricks** extra which contains Databricks Python dbapi client.
@@ -113,7 +113,7 @@ Example:
 [destination.databricks.credentials]
 server_hostname = "MY_DATABRICKS.azuredatabricks.net"
 http_path = "/sql/1.0/warehouses/12345"
-access_token "MY_ACCESS_TOKEN"
+access_token = "MY_ACCESS_TOKEN"
 catalog = "my_catalog"
 ```
 
@@ -123,7 +123,7 @@ All write dispositions are supported
 ## Data loading
 Data is loaded using `INSERT VALUES` statements by default.
 
-Efficient loading from a staging filesystem is also supported by configuring an Amazon S3 or Azure Blob Storage bucket as a staging destination. When staging is enabled `dlt` will upload data in `parquet` files to the bucket and then use `COPY INTO` statements to ingest the data into Databricks.
+Efficient loading from a staging filesystem is also supported by configuring an Amazon S3 or Azure Blob Storage bucket as a staging destination. When staging is enabled, `dlt` will upload data in `parquet` files to the bucket and then use `COPY INTO` statements to ingest the data into Databricks.
 For more information on staging, see the [staging support](#staging-support) section below.
 
 ## Supported file formats
@@ -133,7 +133,7 @@ For more information on staging, see the [staging support](#staging-support) sec
 
 The `jsonl` format has some limitations when used with Databricks:
 
-1. Compression must be disabled to load jsonl files in databricks. Set `data_writer.disable_compression` to `true` in dlt config when using this format.
+1. Compression must be disabled to load jsonl files in Databricks. Set `data_writer.disable_compression` to `true` in dlt config when using this format.
 2. The following data types are not supported when using `jsonl` format with `databricks`: `decimal`, `complex`, `date`, `binary`. Use `parquet` if your data contains these types.
 3. `bigint` data type with precision is not supported with `jsonl` format
 
@@ -144,16 +144,16 @@ Databricks supports both Amazon S3 and Azure Blob Storage as staging locations.
 
 ### Databricks and Amazon S3
 
-Please refer to the [S3 documentation](./filesystem.md#aws-s3) for details on connecting your s3 bucket with the bucket_url and credentials.
+Please refer to the [S3 documentation](./filesystem.md#aws-s3) for details on connecting your S3 bucket with the bucket_url and credentials.
 
-Example to set up Databricks with s3 as a staging destination:
+Example to set up Databricks with S3 as a staging destination:
 
-```python
+```py
 import dlt
 
 # Create a dlt pipeline that will load
 # chess player data to the Databricks destination
-# via staging on s3
+# via staging on S3
 pipeline = dlt.pipeline(
     pipeline_name='chess_pipeline',
     destination='databricks',
@@ -168,7 +168,7 @@ Refer to the [Azure Blob Storage filesystem documentation](./filesystem.md#azure
 
 Example to set up Databricks with Azure as a staging destination:
 
-```python
+```py
 # Create a dlt pipeline that will load
 # chess player data to the Databricks destination
 # via staging on Azure Blob Storage
@@ -195,4 +195,4 @@ This destination fully supports [dlt state sync](../../general-usage/state#synci
 - [Load data from Google Analytics to Databricks in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-databricks)
 - [Load data from Google Sheets to Databricks in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-databricks)
 - [Load data from Chess.com to Databricks in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-databricks)
-<!--@@@DLT_SNIPPET_END tuba::databricks-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::databricks-->
diff --git a/docs/website/docs/dlt-ecosystem/destinations/destination.md b/docs/website/docs/dlt-ecosystem/destinations/destination.md
new file mode 100644
index 0000000000..60753d90b5
--- /dev/null
+++ b/docs/website/docs/dlt-ecosystem/destinations/destination.md
@@ -0,0 +1,200 @@
+---
+title: Custom destination
+description: Custom `dlt` destination function for reverse ETL
+keywords: [reverse etl, sink, function, decorator, destination, custom destination]
+---
+
+# Custom destination: Reverse ETL
+
+The `dlt` destination decorator allows you to receive all data passing through your pipeline in a simple function. This can be extremely useful for reverse ETL, where you are pushing data back to an API.
+
+You can also use this for sending data to a queue or a simple database destination that is not yet supported by `dlt`, although be aware that you will have to manually handle your own migrations in this case.
+
+It will also allow you to simply get a path to the files of your normalized data. So, if you need direct access to parquet or jsonl files to copy them somewhere or push them to a database, you can do this here too.
+
+## Install `dlt` for reverse ETL
+
+To install `dlt` without additional dependencies:
+```sh
+pip install dlt
+```
+
+## Set up a destination function for your pipeline
+
+The custom destination decorator differs from other destinations in that you do not need to provide connection credentials, but rather you provide a function which gets called for all items loaded during a pipeline run or load operation. With the `@dlt.destination`, you can convert any function that takes two arguments into a `dlt` destination.
+
+A very simple dlt pipeline that pushes a list of items into a destination function might look like this:
+
+```py
+import dlt
+from dlt.common.typing import TDataItems
+from dlt.common.schema import TTableSchema
+
+@dlt.destination(batch_size=10)
+def my_destination(items: TDataItems, table: TTableSchema) -> None:
+    print(table["name"])
+    print(items)
+
+pipeline = dlt.pipeline("custom_destination_pipeline", destination=my_destination)
+pipeline.run([1, 2, 3], table_name="items")
+```
+
+:::tip
+1. You can also remove the typing information (`TDataItems` and `TTableSchema`) from this example. Typing is generally useful to know the shape of the incoming objects, though.
+2. There are a few other ways for declaring custom destination functions for your pipeline described below.
+:::
+
+### `@dlt.destination`, custom destination function, and signature
+
+The full signature of the destination decorator plus its function is the following:
+
+```py
+@dlt.destination(
+    batch_size=10,
+    loader_file_format="jsonl",
+    name="my_custom_destination",
+    naming_convention="direct",
+    max_nesting_level=0,
+    skip_dlt_columns_and_tables=True
+)
+def my_destination(items: TDataItems, table: TTableSchema) -> None:
+    ...
+```
+
+### Decorator arguments
+* The `batch_size` parameter on the destination decorator defines how many items per function call are batched together and sent as an array. If you set a batch-size of `0`, instead of passing in actual data items, you will receive one call per load job with the path of the file as the items argument. You can then open and process that file in any way you like.
+* The `loader_file_format` parameter on the destination decorator defines in which format files are stored in the load package before being sent to the destination function. This can be `jsonl` or `parquet`.
+* The `name` parameter on the destination decorator defines the name of the destination that gets created by the destination decorator.
+* The `naming_convention` parameter on the destination decorator defines the name of the destination that gets created by the destination decorator. This controls how table and column names are normalized. The default is `direct`, which will keep all names the same.
+* The `max_nesting_level` parameter on the destination decorator defines how deep the normalizer will go to normalize complex fields on your data to create subtables. This overwrites any settings on your `source` and is set to zero to not create any nested tables by default.
+* The `skip_dlt_columns_and_tables` parameter on the destination decorator defines whether internal tables and columns will be fed into the custom destination function. This is set to `True` by default.
+
+:::note
+Settings above make sure that shape of the data you receive in the destination function is as close as possible to what you see in the data source.
+
+* The custom destination sets the `max_nesting_level` to 0 by default, which means no sub-tables will be generated during the normalization phase.
+* The custom destination also skips all internal tables and columns by default. If you need these, set `skip_dlt_columns_and_tables` to False.
+:::
+
+### Custom destination function
+* The `items` parameter on the custom destination function contains the items being sent into the destination function.
+* The `table` parameter contains the schema table the current call belongs to, including all table hints and columns. For example, the table name can be accessed with `table["name"]`.
+* You can also add config values and secrets to the function arguments, see below!
+
+## Add configuration, credentials and other secret to the destination function
+The destination decorator supports settings and secrets variables. If you, for example, plan to connect to a service that requires an API secret or a login, you can do the following:
+
+```py
+@dlt.destination(batch_size=10, loader_file_format="jsonl", name="my_destination")
+def my_destination(items: TDataItems, table: TTableSchema, api_key: dlt.secrets.value) -> None:
+    ...
+```
+
+You can then set a config variable in your `.dlt/secrets.toml`: like so:
+
+```toml
+[destination.my_destination]
+api_key="<my-api-key>"
+```
+
+Custom destinations follow the same configuration rules as [regular named destinations](../../general-usage/destination.md#configure-a-destination)
+
+## Use the custom destination in `dlt` pipeline
+
+There are multiple ways to pass the custom destination function to `dlt` pipeline:
+- Directly reference the destination function
+
+  ```py
+  @dlt.destination(batch_size=10)
+  def local_destination_func(items: TDataItems, table: TTableSchema) -> None:
+      ...
+
+  # reference function directly
+  p = dlt.pipeline("my_pipe", destination=local_destination_func)
+  ```
+
+  Like for [regular destinations](../../general-usage/destination.md#pass-explicit-credentials), you are allowed to pass configuration and credentials
+  explicitly to destination function.
+  ```py
+  @dlt.destination(batch_size=10, loader_file_format="jsonl", name="my_destination")
+  def my_destination(items: TDataItems, table: TTableSchema, api_key: dlt.secrets.value) -> None:
+      ...
+
+  p = dlt.pipeline("my_pipe", destination=my_destination(api_key=os.getenv("MY_API_KEY")))
+  ```
+
+- Directly via destination reference. In this case, don't use the decorator for the destination function.
+  ```py
+  # file my_destination.py
+
+  from dlt.common.destination import Destination
+
+  # don't use the decorator
+  def local_destination_func(items: TDataItems, table: TTableSchema) -> None:
+      ...
+
+  # via destination reference
+  p = dlt.pipeline(
+      "my_pipe",
+      destination=Destination.from_reference(
+          "destination", destination_callable=local_destination_func
+      )
+  )
+  ```
+- Via a fully qualified string to function location (can be used from `config.toml` or ENV vars). The destination function should be located in another file.
+  ```py
+  # file my_pipeline.py
+
+  from dlt.common.destination import Destination
+
+  # fully qualified string to function location
+  p = dlt.pipeline(
+      "my_pipe",
+      destination=Destination.from_reference(
+          "destination", destination_callable="my_destination.local_destination_func"
+      )
+  )
+  ```
+
+## Adjust batch size and retry policy for atomic loads
+The destination keeps a local record of how many `DataItems` were processed, so if you, for example, use the custom destination to push `DataItems` to a remote API, and this
+API becomes unavailable during the load resulting in a failed `dlt` pipeline run, you can repeat the run of your pipeline at a later moment and the custom destination will **restart from the whole batch that failed**. We are preventing any data from being lost, but you can still get duplicated data if you committed half of the batch ie. to a database and then failed.
+**Keeping the batch atomicity is on you**. For this reason it makes sense to choose a batch size that you can process in one transaction (say one api request or one database transaction) so that if this request or transaction fail repeatedly you can repeat it at the next run without pushing duplicate data to your remote location. For systems that
+are not transactional and do not tolerate duplicated data, you can use batch of size 1.
+
+Destination functions that raise exceptions are retried 5 times before giving up (`load.raise_on_max_retries` config option). If you run the pipeline again, it will resume loading before extracting new data.
+
+If your exception derives from `DestinationTerminalException`, the whole load job will be marked as failed and not retried again.
+
+:::caution
+If you wipe out the pipeline folder (where job files and destination state are saved) you will not be able to restart from the last failed batch.
+However, it is fairly easy to backup and restore the pipeline directory, [see details below](#manage-pipeline-state-for-incremental-loading).
+:::
+
+## Increase or decrease loading parallelism
+Calls to the destination function by default will be executed on multiple threads, so you need to make sure you are not using any non-thread-safe nonlocal or global variables from outside your destination function. If you need to have all calls be executed from the same thread, you can set the `workers` [config variable of the load step](../../reference/performance.md#load) to 1.
+
+:::tip
+For performance reasons, we recommend keeping the multithreaded approach and making sure that you, for example, are using threadsafe connection pools to a remote database or queue.
+:::
+
+## Write disposition
+
+`@dlt.destination` will forward all normalized `DataItems` encountered during a pipeline run to the custom destination function, so there is no notion of "write dispositions".
+
+## Staging support
+
+`@dlt.destination` does not support staging files in remote locations before being called at this time. If you need this feature, please let us know.
+
+## Manage pipeline state for incremental loading
+Custom destinations do not have a general mechanism to restore pipeline state. This will impact data sources that rely on the state being kept ie. all incremental resources.
+If you wipe the pipeline directory (ie. by deleting a folder or running on AWS lambda / Github Actions where you get a clean runner) the progress of the incremental loading is lost. On the next run you will re-acquire the data from the beginning.
+
+While we are working on a pluggable state storage you can fix the problem above by:
+1. Not wiping the pipeline directory. For example if you run your pipeline on an EC instance periodically, the state will be preserved.
+2. By doing a restore/backup of the pipeline directory before/after it runs. This is way easier than it sounds and [here's a script you can reuse](https://gist.github.com/rudolfix/ee6e16d8671f26ac4b9ffc915ad24b6e).
+
+## What's next
+
+* Check out our [Custom BigQuery Destination](../../examples/custom_destination_bigquery/) example.
+* Need help with building a custom destination? Ask your questions in our [Slack Community](https://dlthub.com/community) technical help channel.
diff --git a/docs/website/docs/dlt-ecosystem/destinations/duckdb.md b/docs/website/docs/dlt-ecosystem/destinations/duckdb.md
index db7428dcc9..63b4aecd80 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/duckdb.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/duckdb.md
@@ -7,38 +7,38 @@ keywords: [duckdb, destination, data warehouse]
 # DuckDB
 
 ## Install dlt with DuckDB
-**To install the DLT library with DuckDB dependencies:**
-```
+**To install the DLT library with DuckDB dependencies, run:**
+```sh
 pip install dlt[duckdb]
 ```
 
 ## Setup Guide
 
-**1. Initialize a project with a pipeline that loads to DuckDB by running**
-```
+**1. Initialize a project with a pipeline that loads to DuckDB by running:**
+```sh
 dlt init chess duckdb
 ```
 
-**2. Install the necessary dependencies for DuckDB by running**
-```
+**2. Install the necessary dependencies for DuckDB by running:**
+```sh
 pip install -r requirements.txt
 ```
 
-**3. Run the pipeline**
-```
+**3. Run the pipeline:**
+```sh
 python3 chess_pipeline.py
 ```
 
 ## Write disposition
-All write dispositions are supported
+All write dispositions are supported.
 
 ## Data loading
-`dlt` will load data using large INSERT VALUES statements by default. Loading is multithreaded (20 threads by default). If you are ok with installing `pyarrow` we suggest to switch to `parquet` as file format. Loading is faster (and also multithreaded).
+`dlt` will load data using large INSERT VALUES statements by default. Loading is multithreaded (20 threads by default). If you are okay with installing `pyarrow`, we suggest switching to `parquet` as the file format. Loading is faster (and also multithreaded).
 
 ### Names normalization
-`dlt` uses standard **snake_case** naming convention to keep identical table and column identifiers across all destinations. If you want to use **duckdb** wide range of characters (ie. emojis) for table and column names, you can switch to **duck_case** naming convention which accepts almost any string as an identifier:
+`dlt` uses the standard **snake_case** naming convention to keep identical table and column identifiers across all destinations. If you want to use the **duckdb** wide range of characters (i.e., emojis) for table and column names, you can switch to the **duck_case** naming convention, which accepts almost any string as an identifier:
 * `\n` `\r`  and `" are translated to `_`
-* multiple `_` are translated to single `_`
+* multiple `_` are translated to a single `_`
 
 Switch the naming convention using `config.toml`:
 ```toml
@@ -46,34 +46,34 @@ Switch the naming convention using `config.toml`:
 naming="duck_case"
 ```
 
-or via env variable `SCHEMA__NAMING` or directly in code:
-```python
+or via the env variable `SCHEMA__NAMING` or directly in the code:
+```py
 dlt.config["schema.naming"] = "duck_case"
 ```
 :::caution
-**duckdb** identifiers are **case insensitive** but display names preserve case. This may create name clashes if for example you load json with
-`{"Column": 1, "column": 2}` will map data to a single column.
+**duckdb** identifiers are **case insensitive** but display names preserve case. This may create name clashes if, for example, you load JSON with
+`{"Column": 1, "column": 2}` as it will map data to a single column.
 :::
 
 
 ## Supported file formats
-You can configure the following file formats to load data to duckdb
+You can configure the following file formats to load data to duckdb:
 * [insert-values](../file-formats/insert-format.md) is used by default
 * [parquet](../file-formats/parquet.md) is supported
 :::note
-`duckdb` cannot COPY many parquet files to a single table from multiple threads. In this situation `dlt` serializes the loads. Still - that may be faster than INSERT
+`duckdb` cannot COPY many parquet files to a single table from multiple threads. In this situation, `dlt` serializes the loads. Still, that may be faster than INSERT.
 :::
-* [jsonl](../file-formats/jsonl.md) **is supported but does not work if JSON fields are optional. the missing keys fail the COPY instead of being interpreted as NULL**
+* [jsonl](../file-formats/jsonl.md) **is supported but does not work if JSON fields are optional. The missing keys fail the COPY instead of being interpreted as NULL.**
 
 ## Supported column hints
-`duckdb` may create unique indexes for all columns with `unique` hints but this behavior **is disabled by default** because it slows the loading down significantly.
+`duckdb` may create unique indexes for all columns with `unique` hints, but this behavior **is disabled by default** because it slows the loading down significantly.
 
 ## Destination Configuration
 
-By default, a DuckDB database will be created in the current working directory with a name `<pipeline_name>.duckdb` (`chess.duckdb` in the example above). After loading, it is available in `read/write` mode via `with pipeline.sql_client() as con:` which is a wrapper over `DuckDBPyConnection`. See [duckdb docs](https://duckdb.org/docs/api/python/overview#persistent-storage) for details.
+By default, a DuckDB database will be created in the current working directory with a name `<pipeline_name>.duckdb` (`chess.duckdb` in the example above). After loading, it is available in `read/write` mode via `with pipeline.sql_client() as con:`, which is a wrapper over `DuckDBPyConnection`. See [duckdb docs](https://duckdb.org/docs/api/python/overview#persistent-storage) for details.
 
 The `duckdb` credentials do not require any secret values. You are free to pass the configuration explicitly via the `credentials` parameter to `dlt.pipeline` or `pipeline.run` methods. For example:
-```python
+```py
 # will load data to files/data.db database file
 p = dlt.pipeline(pipeline_name='chess', destination='duckdb', dataset_name='chess_data', full_refresh=False, credentials="files/data.db")
 
@@ -82,23 +82,23 @@ p = dlt.pipeline(pipeline_name='chess', destination='duckdb', dataset_name='ches
 ```
 
 The destination accepts a `duckdb` connection instance via `credentials`, so you can also open a database connection yourself and pass it to `dlt` to use. `:memory:` databases are supported.
-```python
+```py
 import duckdb
 db = duckdb.connect()
 p = dlt.pipeline(pipeline_name='chess', destination='duckdb', dataset_name='chess_data', full_refresh=False, credentials=db)
 ```
 
-This destination accepts database connection strings in format used by [duckdb-engine](https://github.com/Mause/duckdb_engine#configuration).
+This destination accepts database connection strings in the format used by [duckdb-engine](https://github.com/Mause/duckdb_engine#configuration).
 
-You can configure a DuckDB destination with [secret / config values](../../general-usage/credentials) (e.g. using a `secrets.toml` file)
+You can configure a DuckDB destination with [secret / config values](../../general-usage/credentials) (e.g., using a `secrets.toml` file)
 ```toml
-destination.duckdb.credentials=duckdb:///_storage/test_quack.duckdb
+destination.duckdb.credentials="duckdb:///_storage/test_quack.duckdb"
 ```
-**duckdb://** url above creates a **relative** path to `_storage/test_quack.duckdb`. To define **absolute** path you need to specify four slashes ie. `duckdb:////_storage/test_quack.duckdb`.
+The **duckdb://** URL above creates a **relative** path to `_storage/test_quack.duckdb`. To define an **absolute** path, you need to specify four slashes, i.e., `duckdb:////_storage/test_quack.duckdb`.
 
 A few special connection strings are supported:
-* **:pipeline:** creates the database in the working directory of the pipeline with name `quack.duckdb`.
-* **:memory:** creates in memory database. This may be useful for testing.
+* **:pipeline:** creates the database in the working directory of the pipeline with the name `quack.duckdb`.
+* **:memory:** creates an in-memory database. This may be useful for testing.
 
 
 ### Additional configuration
@@ -109,10 +109,10 @@ create_indexes=true
 ```
 
 ### dbt support
-This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-duckdb](https://github.com/jwills/dbt-duckdb) which is a community supported package. The `duckdb` database is shared with `dbt`. In rare cases you may see information that binary database format does not match the database format expected by `dbt-duckdb`. You may avoid that by updating the `duckdb` package in your `dlt` project with `pip install -U`.
+This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-duckdb](https://github.com/jwills/dbt-duckdb), which is a community-supported package. The `duckdb` database is shared with `dbt`. In rare cases, you may see information that the binary database format does not match the database format expected by `dbt-duckdb`. You can avoid that by updating the `duckdb` package in your `dlt` project with `pip install -U`.
 
 ### Syncing of `dlt` state
-This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination)
+This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination).
 
 <!--@@@DLT_SNIPPET_START tuba::duckdb-->
 ## Additional Setup guides
@@ -124,4 +124,4 @@ This destination fully supports [dlt state sync](../../general-usage/state#synci
 - [Load data from Chess.com to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-duckdb)
 - [Load data from HubSpot to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-duckdb)
 - [Load data from GitHub to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-duckdb)
-<!--@@@DLT_SNIPPET_END tuba::duckdb-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::duckdb-->
diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
index ba323b3d7f..dbd54253b3 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
@@ -7,7 +7,7 @@ Its primary role is to be used as a staging for other destinations, but you can
 
 ## Install dlt with filesystem
 **To install the DLT library with filesystem dependencies:**
-```
+```sh
 pip install dlt[filesystem]
 ```
 
@@ -29,7 +29,7 @@ so pip does not fail on backtracking.
 ### 1. Initialise the dlt project
 
 Let's start by initialising a new dlt project as follows:
-   ```bash
+   ```sh
    dlt init chess filesystem
    ```
    > 💡 This command will initialise your pipeline with chess as the source and the AWS S3 filesystem as the destination.
@@ -38,7 +38,7 @@ Let's start by initialising a new dlt project as follows:
 
 #### AWS S3
 The command above creates sample `secrets.toml` and requirements file for AWS S3 bucket. You can install those dependencies by running:
-```
+```sh
 pip install -r requirements.txt
 ```
 
@@ -71,7 +71,7 @@ You need to create a S3 bucket and a user who can access that bucket. `dlt` is n
 1. You can create the S3 bucket in the AWS console by clicking on "Create Bucket" in S3 and assigning the appropriate name and permissions to the bucket.
 2. Once the bucket is created, you'll have the bucket URL. For example, If the bucket name is `dlt-ci-test-bucket`, then the bucket URL will be:
 
-   ```
+   ```text
    s3://dlt-ci-test-bucket
    ```
 
diff --git a/docs/website/docs/dlt-ecosystem/destinations/index.md b/docs/website/docs/dlt-ecosystem/destinations/index.md
index 5d26c0f138..2c24d14312 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/index.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/index.md
@@ -5,11 +5,11 @@ keywords: ['destinations']
 ---
 import DocCardList from '@theme/DocCardList';
 
-Pick one of our high quality destinations and load your data to a local database, warehouse or a data lake. Append, replace or merge your data. Apply performance hints like partitions, clusters or indexes. Load directly or via staging. Each of our destinations goes through few hundred automated tests every day.
+Pick one of our high-quality destinations and load your data into a local database, warehouse, or data lake. Append, replace, or merge your data. Apply performance hints like partitions, clusters, or indexes. Load directly or via staging. Each of our destinations undergoes several hundred automated tests every day.
 
-* Destination or feature missing? [Join our Slack community](https://dlthub.com/community) and ask for it
-* Need more info? [Join our Slack community](https://dlthub.com/community) and ask in the tech help channel or [Talk to an engineer](https://calendar.app.google/kiLhuMsWKpZUpfho6)
+* Is a destination or feature missing? [Join our Slack community](https://dlthub.com/community) and ask for it.
+* Need more info? [Join our Slack community](https://dlthub.com/community) and ask in the tech help channel or [Talk to an engineer](https://calendar.app.google/kiLhuMsWKpZUpfho6).
 
-Otherwise pick a destination below:
+Otherwise, pick a destination below:
 
 <DocCardList />
diff --git a/docs/website/docs/dlt-ecosystem/destinations/motherduck.md b/docs/website/docs/dlt-ecosystem/destinations/motherduck.md
index b002286bcf..de11ed5772 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/motherduck.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/motherduck.md
@@ -5,36 +5,36 @@ keywords: [MotherDuck, duckdb, destination, data warehouse]
 ---
 
 # MotherDuck
-> 🧪 MotherDuck is still invitation only and intensively tested. Please see the limitations / problems at the end.
+> 🧪 MotherDuck is still invitation-only and is being intensively tested. Please see the limitations/problems at the end.
 
 ## Install dlt with MotherDuck
 **To install the DLT library with MotherDuck dependencies:**
-```
+```sh
 pip install dlt[motherduck]
 ```
 
 :::tip
-Decrease the number of load workers to 3-5 depending on the quality of your internet connection if you see a lot of retries in your logs with various timeout, add the following to your `config.toml`:
+If you see a lot of retries in your logs with various timeouts, decrease the number of load workers to 3-5 depending on the quality of your internet connection. Add the following to your `config.toml`:
 ```toml
 [load]
 workers=3
 ```
-or export **LOAD__WORKERS=3** env variable. See more in [performance](../../reference/performance.md)
+or export the **LOAD__WORKERS=3** env variable. See more in [performance](../../reference/performance.md)
 :::
 
 ## Setup Guide
 
 **1. Initialize a project with a pipeline that loads to MotherDuck by running**
-```
+```sh
 dlt init chess motherduck
 ```
 
 **2. Install the necessary dependencies for MotherDuck by running**
-```
+```sh
 pip install -r requirements.txt
 ```
 
-This will install dlt with **motherduck** extra which contains **duckdb** and **pyarrow** dependencies
+This will install dlt with the **motherduck** extra which contains **duckdb** and **pyarrow** dependencies.
 
 **3. Add your MotherDuck token to `.dlt/secrets.toml`**
 ```toml
@@ -42,63 +42,61 @@ This will install dlt with **motherduck** extra which contains **duckdb** and **
 database = "dlt_data_3"
 password = "<your token here>"
 ```
-Paste your **service token** into password. The `database` field is optional but we recommend to set it. MotherDuck will create this database (in this case `dlt_data_3`) for you.
+Paste your **service token** into the password field. The `database` field is optional, but we recommend setting it. MotherDuck will create this database (in this case `dlt_data_3`) for you.
 
-Alternatively you can use the connection string syntax
+Alternatively, you can use the connection string syntax.
 ```toml
 [destination]
 motherduck.credentials="md:///dlt_data_3?token=<my service token>"
 ```
 
-**3. Run the pipeline**
-```
+**4. Run the pipeline**
+```sh
 python3 chess_pipeline.py
 ```
 
 ## Write disposition
-All write dispositions are supported
+All write dispositions are supported.
 
 ## Data loading
-By default **parquet** files and `COPY` command is used to move files to remote duckdb database. All write dispositions are supported.
+By default, Parquet files and the `COPY` command are used to move files to the remote duckdb database. All write dispositions are supported.
 
-**INSERT** format is also supported and will execute a large INSERT queries directly into the remote database. This is way slower and may exceed maximum query size - so not advised.
+The **INSERT** format is also supported and will execute large INSERT queries directly into the remote database. This method is significantly slower and may exceed the maximum query size, so it is not advised.
 
 ## dbt support
-This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-duckdb](https://github.com/jwills/dbt-duckdb) which is a community supported package. `dbt` version >= 1.5 is required (which is current `dlt` default.)
+This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-duckdb](https://github.com/jwills/dbt-duckdb), which is a community-supported package. `dbt` version >= 1.5 is required (which is the current `dlt` default.)
 
 ## Syncing of `dlt` state
-This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination)
+This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination).
 
 ## Automated tests
-Each destination must pass few hundred automatic tests. MotherDuck is passing those tests (except the transactions OFC). However we encountered issues with ATTACH timeouts when connecting which makes running such number of tests unstable. Tests on CI are disabled.
+Each destination must pass a few hundred automatic tests. MotherDuck is passing these tests (except for the transactions, of course). However, we have encountered issues with ATTACH timeouts when connecting, which makes running such a number of tests unstable. Tests on CI are disabled.
 
 ## Troubleshooting / limitations
 
 ### I see a lot of errors in the log like DEADLINE_EXCEEDED or Connection timed out
-Motherduck is very sensitive to quality of the internet connection and **number of workers used to load data**. Decrease the number of workers and make sure your internet connection really works. We could not find any way to increase those timeouts yet.
-
+MotherDuck is very sensitive to the quality of the internet connection and the **number of workers used to load data**. Decrease the number of workers and ensure your internet connection is stable. We have not found any way to increase these timeouts yet.
 
 ### MotherDuck does not support transactions.
-Do not use `begin`, `commit` and `rollback` on `dlt` **sql_client** or on duckdb dbapi connection. It has no effect for DML statements (they are autocommit). It is confusing the query engine for DDL (tables not found etc.).
-If your connection if of poor quality and you get a time out when executing DML query it may happen that your transaction got executed,
-
+Do not use `begin`, `commit`, and `rollback` on `dlt` **sql_client** or on the duckdb dbapi connection. It has no effect on DML statements (they are autocommit). It confuses the query engine for DDL (tables not found, etc.).
+If your connection is of poor quality and you get a timeout when executing a DML query, it may happen that your transaction got executed.
 
 ### I see some exception with home_dir missing when opening `md:` connection.
-Some internal component (HTTPS) requires **HOME** env variable to be present. Export such variable to the command line. Here is what we do in our tests:
-```python
+Some internal component (HTTPS) requires the **HOME** env variable to be present. Export such a variable to the command line. Here is what we do in our tests:
+```py
 os.environ["HOME"] = "/tmp"
 ```
-before opening connection
+before opening the connection.
 
 ### I see some watchdog timeouts.
 We also see them.
-```
+```text
 'ATTACH_DATABASE': keepalive watchdog timeout
 ```
-My observation is that if you write a lot of data into the database then close the connection and then open it again to write, there's a chance of such timeout. Possible **WAL** file is being written to the remote duckdb database.
+Our observation is that if you write a lot of data into the database, then close the connection and then open it again to write, there's a chance of such a timeout. A possible **WAL** file is being written to the remote duckdb database.
 
 ### Invalid Input Error: Initialization function "motherduck_init" from file
 Use `duckdb 0.8.1` or above.
 
 <!--@@@DLT_SNIPPET_START tuba::motherduck-->
-<!--@@@DLT_SNIPPET_END tuba::motherduck-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::motherduck-->
diff --git a/docs/website/docs/dlt-ecosystem/destinations/mssql.md b/docs/website/docs/dlt-ecosystem/destinations/mssql.md
index 9d216a52a3..fc3eede075 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/mssql.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/mssql.md
@@ -7,8 +7,8 @@ keywords: [mssql, sqlserver, destination, data warehouse]
 # Microsoft SQL Server
 
 ## Install dlt with MS SQL
-**To install the DLT library with MS SQL dependencies:**
-```
+**To install the DLT library with MS SQL dependencies, use:**
+```sh
 pip install dlt[mssql]
 ```
 
@@ -16,35 +16,35 @@ pip install dlt[mssql]
 
 ### Prerequisites
 
-_Microsoft ODBC Driver for SQL Server_ must be installed to use this destination.
-This can't be included with `dlt`'s python dependencies, so you must install it separately on your system. You can find the official installation instructions [here](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16).
+The _Microsoft ODBC Driver for SQL Server_ must be installed to use this destination.
+This cannot be included with `dlt`'s python dependencies, so you must install it separately on your system. You can find the official installation instructions [here](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16).
 
 Supported driver versions:
 * `ODBC Driver 18 for SQL Server`
 * `ODBC Driver 17 for SQL Server`
 
-You can [configure driver name](#additional-destination-options) explicitly as well.
+You can also [configure the driver name](#additional-destination-options) explicitly.
 
 ### Create a pipeline
 
-**1. Initalize a project with a pipeline that loads to MS SQL by running**
-```
+**1. Initialize a project with a pipeline that loads to MS SQL by running:**
+```sh
 dlt init chess mssql
 ```
 
-**2. Install the necessary dependencies for MS SQL by running**
-```
+**2. Install the necessary dependencies for MS SQL by running:**
+```sh
 pip install -r requirements.txt
 ```
 or run:
-```
+```sh
 pip install dlt[mssql]
 ```
-This will install dlt with **mssql** extra which contains all the dependencies required by the SQL server client.
+This will install `dlt` with the `mssql` extra, which contains all the dependencies required by the SQL server client.
 
 **3. Enter your credentials into `.dlt/secrets.toml`.**
 
-Example, replace with your database connection info:
+For example, replace with your database connection info:
 ```toml
 [destination.mssql.credentials]
 database = "dlt_data"
@@ -61,34 +61,34 @@ You can also pass a SQLAlchemy-like database connection:
 destination.mssql.credentials="mssql://loader:<password>@loader.database.windows.net/dlt_data?connect_timeout=15"
 ```
 
-To pass credentials directly you can use `credentials` argument passed to `dlt.pipeline` or `pipeline.run` methods.
-```python
+To pass credentials directly, you can use the `credentials` argument passed to `dlt.pipeline` or `pipeline.run` methods.
+```py
 pipeline = dlt.pipeline(pipeline_name='chess', destination='postgres', dataset_name='chess_data', credentials="mssql://loader:<password>@loader.database.windows.net/dlt_data?connect_timeout=15")
 ```
 
 ## Write disposition
-All write dispositions are supported
+All write dispositions are supported.
 
-If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized` the destination tables will be dropped and
+If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized`, the destination tables will be dropped and
 recreated with an `ALTER SCHEMA ... TRANSFER`. The operation is atomic: mssql supports DDL transactions.
 
 ## Data loading
-Data is loaded via INSERT statements by default. MSSQL has a limit of 1000 rows per INSERT and this is what we use.
+Data is loaded via INSERT statements by default. MSSQL has a limit of 1000 rows per INSERT, and this is what we use.
 
 ## Supported file formats
 * [insert-values](../file-formats/insert-format.md) is used by default
 
 ## Supported column hints
-**mssql** will create unique indexes for all columns with `unique` hints. This behavior **may be disabled**
+**mssql** will create unique indexes for all columns with `unique` hints. This behavior **may be disabled**.
 
 ## Syncing of `dlt` state
-This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination)
+This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination).
 
 ## Data types
-MS SQL does not support JSON columns, so JSON objects are stored as strings in `nvarchar` column.
+MS SQL does not support JSON columns, so JSON objects are stored as strings in `nvarchar` columns.
 
 ## Additional destination options
-**mssql** destination **does not** creates UNIQUE indexes by default on columns with `unique` hint (ie. `_dlt_id`). To enable this behavior
+The **mssql** destination **does not** create UNIQUE indexes by default on columns with the `unique` hint (i.e., `_dlt_id`). To enable this behavior:
 ```toml
 [destination.mssql]
 create_indexes=true
@@ -108,7 +108,7 @@ destination.mssql.credentials="mssql://loader:<password>@loader.database.windows
 ```
 
 ### dbt support
-No dbt support yet
+No dbt support yet.
 
 <!--@@@DLT_SNIPPET_START tuba::mssql-->
 ## Additional Setup guides
@@ -120,4 +120,4 @@ No dbt support yet
 - [Load data from GitHub to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-mssql)
 - [Load data from Notion to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-mssql)
 - [Load data from HubSpot to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-mssql)
-<!--@@@DLT_SNIPPET_END tuba::mssql-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::mssql-->
diff --git a/docs/website/docs/dlt-ecosystem/destinations/postgres.md b/docs/website/docs/dlt-ecosystem/destinations/postgres.md
index cd0ea08929..ddf4aae9f8 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/postgres.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/postgres.md
@@ -7,47 +7,47 @@ keywords: [postgres, destination, data warehouse]
 # Postgres
 
 ## Install dlt with PostgreSQL
-**To install the DLT library with PostgreSQL dependencies:**
-```
+**To install the DLT library with PostgreSQL dependencies, run:**
+```sh
 pip install dlt[postgres]
 ```
 
 ## Setup Guide
 
-**1. Initialize a project with a pipeline that loads to Postgres by running**
-```
+**1. Initialize a project with a pipeline that loads to Postgres by running:**
+```sh
 dlt init chess postgres
 ```
 
-**2. Install the necessary dependencies for Postgres by running**
-```
+**2. Install the necessary dependencies for Postgres by running:**
+```sh
 pip install -r requirements.txt
 ```
-This will install dlt with **postgres** extra which contains `psycopg2` client.
+This will install dlt with the `postgres` extra, which contains the `psycopg2` client.
 
-**3. Create a new database after setting up a Postgres instance and `psql` / query editor by running**
-```
+**3. After setting up a Postgres instance and `psql` / query editor, create a new database by running:**
+```sql
 CREATE DATABASE dlt_data;
 ```
 
-Add `dlt_data` database to `.dlt/secrets.toml`.
+Add the `dlt_data` database to `.dlt/secrets.toml`.
 
-**4. Create a new user by running**
-```
+**4. Create a new user by running:**
+```sql
 CREATE USER loader WITH PASSWORD '<password>';
 ```
 
-Add `loader` user and `<password>` password to `.dlt/secrets.toml`.
+Add the `loader` user and `<password>` password to `.dlt/secrets.toml`.
 
-**5. Give the `loader` user owner permissions by running**
-```
+**5. Give the `loader` user owner permissions by running:**
+```sql
 ALTER DATABASE dlt_data OWNER TO loader;
 ```
 
-It is possible to set more restrictive permissions (e.g. give user access to a specific schema).
+You can set more restrictive permissions (e.g., give user access to a specific schema).
 
 **6. Enter your credentials into `.dlt/secrets.toml`.**
-It should now look like
+It should now look like this:
 ```toml
 [destination.postgres.credentials]
 
@@ -59,33 +59,33 @@ port = 5432
 connect_timeout = 15
 ```
 
-You can also pass a database connection string similar to the one used by `psycopg2` library or [SQLAlchemy](https://docs.sqlalchemy.org/en/20/core/engines.html#postgresql). Credentials above will look like this:
+You can also pass a database connection string similar to the one used by the `psycopg2` library or [SQLAlchemy](https://docs.sqlalchemy.org/en/20/core/engines.html#postgresql). The credentials above will look like this:
 ```toml
 # keep it at the top of your toml file! before any section starts
 destination.postgres.credentials="postgresql://loader:<password>@localhost/dlt_data?connect_timeout=15"
 ```
 
-To pass credentials directly you can use `credentials` argument passed to `dlt.pipeline` or `pipeline.run` methods.
-```python
+To pass credentials directly, you can use the `credentials` argument passed to the `dlt.pipeline` or `pipeline.run` methods.
+```py
 pipeline = dlt.pipeline(pipeline_name='chess', destination='postgres', dataset_name='chess_data', credentials="postgresql://loader:<password>@localhost/dlt_data")
 ```
 
 ## Write disposition
-All write dispositions are supported
+All write dispositions are supported.
 
-If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized` the destination tables will be dropped and replaced by the staging tables.
+If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized`, the destination tables will be dropped and replaced by the staging tables.
 
 ## Data loading
 `dlt` will load data using large INSERT VALUES statements by default. Loading is multithreaded (20 threads by default).
 
 ## Supported file formats
-* [insert-values](../file-formats/insert-format.md) is used by default
+* [insert-values](../file-formats/insert-format.md) is used by default.
 
 ## Supported column hints
-`postgres` will create unique indexes for all columns with `unique` hints. This behavior **may be disabled**
+`postgres` will create unique indexes for all columns with `unique` hints. This behavior **may be disabled**.
 
 ## Additional destination options
-Postgres destination creates UNIQUE indexes by default on columns with `unique` hint (ie. `_dlt_id`). To disable this behavior
+The Postgres destination creates UNIQUE indexes by default on columns with the `unique` hint (i.e., `_dlt_id`). To disable this behavior:
 ```toml
 [destination.postgres]
 create_indexes=false
@@ -95,16 +95,16 @@ create_indexes=false
 This destination [integrates with dbt](../transformations/dbt/dbt.md) via dbt-postgres.
 
 ### Syncing of `dlt` state
-This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination)
+This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination).
 
 <!--@@@DLT_SNIPPET_START tuba::postgres-->
-## Additional Setup guides
-
-- [Load data from HubSpot to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-postgres)
-- [Load data from GitHub to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-postgres)
-- [Load data from Chess.com to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-postgres)
-- [Load data from Notion to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-postgres)
-- [Load data from Google Analytics to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-postgres)
-- [Load data from Google Sheets to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-postgres)
-- [Load data from Stripe to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-postgres)
-<!--@@@DLT_SNIPPET_END tuba::postgres-->
\ No newline at end of file
+## Additional Setup Guides
+
+- [Load data from HubSpot to PostgreSQL in Python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-postgres)
+- [Load data from GitHub to PostgreSQL in Python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-postgres)
+- [Load data from Chess.com to PostgreSQL in Python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-postgres)
+- [Load data from Notion to PostgreSQL in Python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-postgres)
+- [Load data from Google Analytics to PostgreSQL in Python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-postgres)
+- [Load data from Google Sheets to PostgreSQL in Python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-postgres)
+- [Load data from Stripe to PostgreSQL in Python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-postgres)
+<!--@@@DLT_SNIPPET_END tuba::postgres-->
diff --git a/docs/website/docs/dlt-ecosystem/destinations/qdrant.md b/docs/website/docs/dlt-ecosystem/destinations/qdrant.md
index 04b5cac19b..40d85a43a5 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/qdrant.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/qdrant.md
@@ -13,7 +13,7 @@ This destination helps you load data into Qdrant from [dlt resources](../../gene
 
 1. To use Qdrant as a destination, make sure `dlt` is installed with the `qdrant` extra:
 
-```bash
+```sh
 pip install dlt[qdrant]
 ```
 
@@ -31,7 +31,7 @@ If no configuration options are provided, the default fallback will be `http://l
 
 3. Define the source of the data. For starters, let's load some data from a simple data structure:
 
-```python
+```py
 import dlt
 from dlt.destinations.adapters import qdrant_adapter
 
@@ -53,7 +53,7 @@ movies = [
 
 4. Define the pipeline:
 
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name="movies",
     destination="qdrant",
@@ -63,7 +63,7 @@ pipeline = dlt.pipeline(
 
 5. Run the pipeline:
 
-```python
+```py
 info = pipeline.run(
     qdrant_adapter(
         movies,
@@ -74,7 +74,7 @@ info = pipeline.run(
 
 6. Check the results:
 
-```python
+```py
 print(info)
 ```
 
@@ -86,20 +86,20 @@ To use vector search after the data has been loaded, you must specify which fiel
 
 The `qdrant_adapter` is a helper function that configures the resource for the Qdrant destination:
 
-```python
+```py
 qdrant_adapter(data, embed)
 ```
 
 It accepts the following arguments:
 
-- `data`: a dlt resource object or a Python data structure (e.g. a list of dictionaries).
+- `data`: a dlt resource object or a Python data structure (e.g., a list of dictionaries).
 - `embed`: a name of the field or a list of names to generate embeddings for.
 
 Returns: [DLT resource](../../general-usage/resource.md) object that you can pass to the `pipeline.run()`.
 
 Example:
 
-```python
+```py
 qdrant_adapter(
     resource,
     embed=["title", "description"],
@@ -122,7 +122,7 @@ The [replace](../../general-usage/full-loading.md) disposition replaces the data
 
 In the movie example from the [setup guide](#setup-guide), we can use the `replace` disposition to reload the data every time we run the pipeline:
 
-```python
+```py
 info = pipeline.run(
     qdrant_adapter(
         movies,
@@ -135,9 +135,9 @@ info = pipeline.run(
 ### Merge
 
 The [merge](../../general-usage/incremental-loading.md) write disposition merges the data from the resource with the data at the destination.
-For `merge` disposition, you would need to specify a `primary_key` for the resource:
+For the `merge` disposition, you need to specify a `primary_key` for the resource:
 
-```python
+```py
 info = pipeline.run(
     qdrant_adapter(
         movies,
@@ -166,11 +166,11 @@ Qdrant uses collections to categorize and identify data. To avoid potential nami
 
 For example, if you have a dataset named `movies_dataset` and a table named `actors`, the Qdrant collection name would be `movies_dataset_actors` (the default separator is an underscore).
 
-However, if you prefer to have class names without the dataset prefix, skip `dataset_name` argument.
+However, if you prefer to have class names without the dataset prefix, skip the `dataset_name` argument.
 
 For example:
 
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name="movies",
     destination="qdrant",
@@ -185,7 +185,7 @@ pipeline = dlt.pipeline(
 
 - `upload_batch_size`: (int) The batch size for data uploads. The default value is 64.
 
-- `upload_parallelism`: (int) The maximal number of concurrent threads to run data uploads. The default value is 1.
+- `upload_parallelism`: (int) The maximum number of concurrent threads to run data uploads. The default value is 1.
 
 - `upload_max_retries`: (int) The number of retries to upload data in case of failure. The default value is 3.
 
@@ -222,4 +222,4 @@ You can find the setup instructions to run Qdrant [here](https://qdrant.tech/doc
 Qdrant destination supports syncing of the `dlt` state.
 
 <!--@@@DLT_SNIPPET_START tuba::qdrant-->
-<!--@@@DLT_SNIPPET_END tuba::qdrant-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::qdrant-->
diff --git a/docs/website/docs/dlt-ecosystem/destinations/redshift.md b/docs/website/docs/dlt-ecosystem/destinations/redshift.md
index cb220a31fc..7b56377f3b 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/redshift.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/redshift.md
@@ -8,7 +8,7 @@ keywords: [redshift, destination, data warehouse]
 
 ## Install dlt with Redshift
 **To install the DLT library with Redshift dependencies:**
-```
+```sh
 pip install dlt[redshift]
 ```
 
@@ -17,19 +17,19 @@ pip install dlt[redshift]
 
 Let's start by initializing a new dlt project as follows:
 
-```bash
+```sh
 dlt init chess redshift
 ```
 > 💡 This command will initialize your pipeline with chess as the source and Redshift as the destination.
 
 The above command generates several files and directories, including `.dlt/secrets.toml` and a requirements file for Redshift. You can install the necessary dependencies specified in the requirements file by executing it as follows:
-```bash
+```sh
 pip install -r requirements.txt
 ```
 or with `pip install dlt[redshift]`, which installs the `dlt` library and the necessary dependencies for working with Amazon Redshift as a destination.
 
 ### 2. Setup Redshift cluster
-To load data into Redshift, it is necessary to create a Redshift cluster and enable access to your IP address through the VPC inbound rules associated with the cluster. While we recommend asking our GPT-4 assistant for details, we have provided a general outline of the process below:
+To load data into Redshift, you need to create a Redshift cluster and enable access to your IP address through the VPC inbound rules associated with the cluster. While we recommend asking our GPT-4 assistant for details, we have provided a general outline of the process below:
 
 1. You can use an existing cluster or create a new one.
 2. To create a new cluster, navigate to the 'Provisioned Cluster Dashboard' and click 'Create Cluster'.
@@ -52,16 +52,16 @@ To load data into Redshift, it is necessary to create a Redshift cluster and ena
 
 2. The "host" is derived from the cluster endpoint specified in the “General Configuration.” For example:
 
-    ```bash
+    ```sh
     # If the endpoint is:
     redshift-cluster-1.cv3cmsy7t4il.us-east-1.redshift.amazonaws.com:5439/your_database_name
     # Then the host is:
     redshift-cluster-1.cv3cmsy7t4il.us-east-1.redshift.amazonaws.com
     ```
 
-3. The `connect_timeout` is the number of minutes the pipeline will wait before the timeout.
+3. The `connect_timeout` is the number of minutes the pipeline will wait before timing out.
 
-You can also pass a database connection string similar to the one used by `psycopg2` library or [SQLAlchemy](https://docs.sqlalchemy.org/en/20/core/engines.html#postgresql). Credentials above will look like this:
+You can also pass a database connection string similar to the one used by the `psycopg2` library or [SQLAlchemy](https://docs.sqlalchemy.org/en/20/core/engines.html#postgresql). The credentials above will look like this:
 ```toml
 # keep it at the top of your toml file! before any section starts
 destination.redshift.credentials="redshift://loader:<password>@localhost/dlt_data?connect_timeout=15"
@@ -82,25 +82,24 @@ When staging is enabled:
 
 > ❗ **Redshift cannot load `TIME` columns from `json` or `parquet` files**. `dlt` will fail such jobs permanently. Switch to direct `insert_values` to load time columns.
 
-> ❗ **Redshift cannot detect compression type from `json` files**. `dlt` assumes that `jsonl` files are gzip compressed which is the default.
-
-> ❗ **Redshift loads `complex` types as strings into SUPER with `parquet`**. Use `jsonl` format to store JSON in SUPER natively or transform your SUPER columns with `PARSE_JSON``.
+> ❗ **Redshift cannot detect compression type from `json` files**. `dlt` assumes that `jsonl` files are gzip compressed, which is the default.
 
+> ❗ **Redshift loads `complex` types as strings into SUPER with `parquet`**. Use `jsonl` format to store JSON in SUPER natively or transform your SUPER columns with `PARSE_JSON`.
 
 ## Supported column hints
 
 Amazon Redshift supports the following column hints:
 
-- `cluster` - hint is a Redshift term for table distribution. Applying it to a column makes it the "DISTKEY," affecting query and join performance. Check the following [documentation](https://docs.aws.amazon.com/redshift/latest/dg/c_best-practices-best-dist-key.html) for more info.
-- `sort` - creates SORTKEY to order rows on disk physically. It is used to improve a query and join speed in Redshift, please read the [sort key docs](https://docs.aws.amazon.com/redshift/latest/dg/c_best-practices-sort-key.html) to learn more.
+- `cluster` - This hint is a Redshift term for table distribution. Applying it to a column makes it the "DISTKEY," affecting query and join performance. Check the following [documentation](https://docs.aws.amazon.com/redshift/latest/dg/c_best-practices-best-dist-key.html) for more info.
+- `sort` - This hint creates a SORTKEY to order rows on disk physically. It is used to improve query and join speed in Redshift. Please read the [sort key docs](https://docs.aws.amazon.com/redshift/latest/dg/c_best-practices-sort-key.html) to learn more.
 
 ## Staging support
 
-Redshift supports s3 as a file staging destination. dlt will upload files in the parquet format to s3 and ask redshift to copy their data directly into the db. Please refere to the [S3 documentation](./filesystem.md#aws-s3) to learn how to set up your s3 bucket with the bucket_url and credentials. The `dlt` Redshift loader will use the aws credentials provided for s3 to access the s3 bucket if not specified otherwise (see config options below). Alternatively to parquet files, you can also specify jsonl as the staging file format. For this set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`.
+Redshift supports s3 as a file staging destination. dlt will upload files in the parquet format to s3 and ask Redshift to copy their data directly into the db. Please refer to the [S3 documentation](./filesystem.md#aws-s3) to learn how to set up your s3 bucket with the bucket_url and credentials. The `dlt` Redshift loader will use the AWS credentials provided for s3 to access the s3 bucket if not specified otherwise (see config options below). Alternatively to parquet files, you can also specify jsonl as the staging file format. For this, set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`.
 
-### Authentication iam Role
+### Authentication IAM Role
 
-If you would like to load from s3 without forwarding the aws staging credentials but authorize with an iam role connected to Redshift, follow the [Redshift documentation](https://docs.aws.amazon.com/redshift/latest/mgmt/authorizing-redshift-service.html) to create a role with access to s3 linked to your redshift cluster and change your destination settings to use the iam role:
+If you would like to load from s3 without forwarding the AWS staging credentials but authorize with an IAM role connected to Redshift, follow the [Redshift documentation](https://docs.aws.amazon.com/redshift/latest/mgmt/authorizing-redshift-service.html) to create a role with access to s3 linked to your Redshift cluster and change your destination settings to use the IAM role:
 
 ```toml
 [destination]
@@ -109,7 +108,7 @@ staging_iam_role="arn:aws:iam::..."
 
 ### Redshift/S3 staging example code
 
-```python
+```py
 # Create a dlt pipeline that will load
 # chess player data to the redshift destination
 # via staging on s3
@@ -143,4 +142,4 @@ Supported loader file formats for Redshift are `sql` and `insert_values` (defaul
 - [Load data from GitHub to Redshift in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-redshift)
 - [Load data from Stripe to Redshift in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-redshift)
 - [Load data from Google Sheets to Redshift in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-redshift)
-<!--@@@DLT_SNIPPET_END tuba::redshift-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::redshift-->
diff --git a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md
index 34efb0df39..a65eaec267 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md
@@ -7,30 +7,30 @@ keywords: [Snowflake, destination, data warehouse]
 # Snowflake
 
 ## Install dlt with Snowflake
-**To install the DLT library with Snowflake dependencies:**
-```
+**To install the DLT library with Snowflake dependencies, run:**
+```sh
 pip install dlt[snowflake]
 ```
 
 ## Setup Guide
 
-**1. Initialize a project with a pipeline that loads to snowflake by running**
-```
+**1. Initialize a project with a pipeline that loads to Snowflake by running:**
+```sh
 dlt init chess snowflake
 ```
 
-**2. Install the necessary dependencies for snowflake by running**
-```
+**2. Install the necessary dependencies for Snowflake by running:**
+```sh
 pip install -r requirements.txt
 ```
-This will install dlt with **snowflake** extra which contains Snowflake Python dbapi client.
+This will install `dlt` with the `snowflake` extra, which contains the Snowflake Python dbapi client.
 
-**3. Create a new database, user and give dlt access**
+**3. Create a new database, user, and give dlt access.**
 
 Read the next chapter below.
 
 **4. Enter your credentials into `.dlt/secrets.toml`.**
-It should now look like
+It should now look like this:
 ```toml
 [destination.snowflake.credentials]
 database = "dlt_data"
@@ -40,14 +40,13 @@ host = "kgiotue-wn98412"
 warehouse = "COMPUTE_WH"
 role = "DLT_LOADER_ROLE"
 ```
-In case of snowflake **host** is your [Account Identifier](https://docs.snowflake.com/en/user-guide/admin-account-identifier). You can get in **Admin**/**Accounts** by copying account url:
-https://kgiotue-wn98412.snowflakecomputing.com and extracting the host name (**kgiotue-wn98412**)
+In the case of Snowflake, the **host** is your [Account Identifier](https://docs.snowflake.com/en/user-guide/admin-account-identifier). You can get it in **Admin**/**Accounts** by copying the account URL: https://kgiotue-wn98412.snowflakecomputing.com and extracting the host name (**kgiotue-wn98412**).
 
-The **warehouse** and **role** are optional if you assign defaults to your user. In the example below we do not do that, so we set them explicitly.
+The **warehouse** and **role** are optional if you assign defaults to your user. In the example below, we do not do that, so we set them explicitly.
 
 
 ### Setup the database user and permissions
-Instructions below assume that you use the default account setup that you get after creating Snowflake account. You should have default warehouse named **COMPUTE_WH** and snowflake account. Below we create a new database, user and assign permissions. The permissions are very generous. A more experienced user can easily reduce `dlt` permissions to just one schema in the database.
+The instructions below assume that you use the default account setup that you get after creating a Snowflake account. You should have a default warehouse named **COMPUTE_WH** and a Snowflake account. Below, we create a new database, user, and assign permissions. The permissions are very generous. A more experienced user can easily reduce `dlt` permissions to just one schema in the database.
 ```sql
 --create database with standard settings
 CREATE DATABASE dlt_data;
@@ -67,17 +66,17 @@ GRANT ALL PRIVILEGES ON FUTURE SCHEMAS IN DATABASE dlt_data TO DLT_LOADER_ROLE;
 GRANT ALL PRIVILEGES ON FUTURE TABLES IN DATABASE dlt_data TO DLT_LOADER_ROLE;
 ```
 
-Now you can use the user named `LOADER` to access database `DLT_DATA` and log in with specified password.
+Now you can use the user named `LOADER` to access the database `DLT_DATA` and log in with the specified password.
 
 You can also decrease the suspend time for your warehouse to 1 minute (**Admin**/**Warehouses** in Snowflake UI)
 
 ### Authentication types
-Snowflake destination accepts three authentication types
+Snowflake destination accepts three authentication types:
 - password authentication
 - [key pair authentication](https://docs.snowflake.com/en/user-guide/key-pair-auth)
 - external authentication
 
-The **password authentication** is not any different from other databases like Postgres or Redshift. `dlt` follows the same syntax as [SQLAlchemy dialect](https://docs.snowflake.com/en/developer-guide/python-connector/sqlalchemy#required-parameters).
+The **password authentication** is not any different from other databases like Postgres or Redshift. `dlt` follows the same syntax as the [SQLAlchemy dialect](https://docs.snowflake.com/en/developer-guide/python-connector/sqlalchemy#required-parameters).
 
 You can also pass credentials as a database connection string. For example:
 ```toml
@@ -85,7 +84,7 @@ You can also pass credentials as a database connection string. For example:
 destination.snowflake.credentials="snowflake://loader:<password>@kgiotue-wn98412/dlt_data?warehouse=COMPUTE_WH&role=DLT_LOADER_ROLE"
 ```
 
-In **key pair authentication** you replace password with a private key string that should be in Base64-encoded DER format ([DBT also recommends](https://docs.getdbt.com/docs/core/connect-data-platform/snowflake-setup#key-pair-authentication) base64-encoded private keys for Snowflake connections). The private key may also be encrypted. In that case you must provide a passphrase alongside with the private key.
+In **key pair authentication**, you replace the password with a private key string that should be in Base64-encoded DER format ([DBT also recommends](https://docs.getdbt.com/docs/core/connect-data-platform/snowflake-setup#key-pair-authentication) base64-encoded private keys for Snowflake connections). The private key may also be encrypted. In that case, you must provide a passphrase alongside the private key.
 ```toml
 [destination.snowflake.credentials]
 database = "dlt_data"
@@ -96,13 +95,13 @@ private_key_passphrase="passphrase"
 ```
 > You can easily get the base64-encoded value of your private key by running `base64 -i <path-to-private-key-file>.pem` in your terminal
 
-If you pass a passphrase in the connection string, please url encode it.
+If you pass a passphrase in the connection string, please URL encode it.
 ```toml
 # keep it at the top of your toml file! before any section starts
 destination.snowflake.credentials="snowflake://loader:<password>@kgiotue-wn98412/dlt_data?private_key=<base64 encoded pem>&private_key_passphrase=<url encoded passphrase>"
 ```
 
-In **external authentication** you can use oauth provider like Okta or external browser to authenticate. You pass your authenticator and refresh token as below:
+In **external authentication**, you can use an OAuth provider like Okta or an external browser to authenticate. You pass your authenticator and refresh token as below:
 ```toml
 [destination.snowflake.credentials]
 database = "dlt_data"
@@ -110,17 +109,17 @@ username = "loader"
 authenticator="..."
 token="..."
 ```
-or in connection string as query parameters.
+or in the connection string as query parameters.
 Refer to Snowflake [OAuth](https://docs.snowflake.com/en/user-guide/oauth-intro) for more details.
 
 ## Write disposition
-All write dispositions are supported
+All write dispositions are supported.
 
-If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized` the destination tables will be dropped and
+If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized`, the destination tables will be dropped and
 recreated with a [clone command](https://docs.snowflake.com/en/sql-reference/sql/create-clone) from the staging tables.
 
 ## Data loading
-The data is loaded using internal Snowflake stage. We use `PUT` command and per-table built-in stages by default. Stage files are immediately removed (if not specified otherwise).
+The data is loaded using an internal Snowflake stage. We use the `PUT` command and per-table built-in stages by default. Stage files are immediately removed (if not specified otherwise).
 
 ## Supported file formats
 * [insert-values](../file-formats/insert-format.md) is used by default
@@ -131,47 +130,47 @@ When staging is enabled:
 * [jsonl](../file-formats/jsonl.md) is used by default
 * [parquet](../file-formats/parquet.md) is supported
 
-> ❗ When loading from `parquet`, Snowflake will store `complex` types (JSON) in `VARIANT` as string. Use `jsonl` format instead or use `PARSE_JSON` to update the `VARIANT`` field after loading.
+> ❗ When loading from `parquet`, Snowflake will store `complex` types (JSON) in `VARIANT` as a string. Use the `jsonl` format instead or use `PARSE_JSON` to update the `VARIANT` field after loading.
 
 ## Supported column hints
 Snowflake supports the following [column hints](https://dlthub.com/docs/general-usage/schema#tables-and-columns):
-* `cluster` - creates a cluster column(s). Many column per table are supported and only when a new table is created.
+* `cluster` - creates a cluster column(s). Many columns per table are supported and only when a new table is created.
 
 ### Table and column identifiers
-Snowflake makes all unquoted identifiers uppercase and then resolves them case-insensitive in SQL statements. `dlt` (effectively) does not quote identifies in DDL preserving default behavior.
+Snowflake makes all unquoted identifiers uppercase and then resolves them case-insensitively in SQL statements. `dlt` (effectively) does not quote identifiers in DDL, preserving default behavior.
 
-Names of tables and columns in [schemas](../../general-usage/schema.md) are kept in lower case like for all other destinations. This is the pattern we observed in other tools ie. `dbt`. In case of `dlt` it is however trivial to define your own uppercase [naming convention](../../general-usage/schema.md#naming-convention)
+Names of tables and columns in [schemas](../../general-usage/schema.md) are kept in lower case like for all other destinations. This is the pattern we observed in other tools, i.e., `dbt`. In the case of `dlt`, it is, however, trivial to define your own uppercase [naming convention](../../general-usage/schema.md#naming-convention)
 
 ## Staging support
 
-Snowflake supports s3 and gcs as a file staging destinations. dlt will upload files in the parquet format to the bucket provider and will ask snowflake to copy their data directly into the db.
+Snowflake supports S3 and GCS as file staging destinations. dlt will upload files in the parquet format to the bucket provider and will ask Snowflake to copy their data directly into the db.
 
-Alternavitely to parquet files, you can also specify jsonl as the staging file format. For this set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`.
+Alternatively to parquet files, you can also specify jsonl as the staging file format. For this, set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`.
 
 ### Snowflake and Amazon S3
 
-Please refer to the [S3 documentation](./filesystem.md#aws-s3) to learn how to set up your bucket with the bucket_url and credentials. For s3 The dlt Redshift loader will use the aws credentials provided for s3 to access the s3 bucket if not specified otherwise (see config options below). Alternatively you can create a stage for your S3 Bucket by following the instructions provided in the [Snowflake S3 documentation](https://docs.snowflake.com/en/user-guide/data-load-s3-config-storage-integration).
+Please refer to the [S3 documentation](./filesystem.md#aws-s3) to learn how to set up your bucket with the bucket_url and credentials. For S3, the dlt Redshift loader will use the AWS credentials provided for S3 to access the S3 bucket if not specified otherwise (see config options below). Alternatively, you can create a stage for your S3 Bucket by following the instructions provided in the [Snowflake S3 documentation](https://docs.snowflake.com/en/user-guide/data-load-s3-config-storage-integration).
 The basic steps are as follows:
 
 * Create a storage integration linked to GCS and the right bucket
-* Grant access to this storage integration to the snowflake role you are using to load the data into snowflake.
+* Grant access to this storage integration to the Snowflake role you are using to load the data into Snowflake.
 * Create a stage from this storage integration in the PUBLIC namespace, or the namespace of the schema of your data.
-* Also grant access to this stage for the role you are using to load data into snowflake.
+* Also grant access to this stage for the role you are using to load data into Snowflake.
 * Provide the name of your stage (including the namespace) to dlt like so:
 
-To prevent dlt from forwarding the s3 bucket credentials on every command, and set your s3 stage, change these settings:
+To prevent dlt from forwarding the S3 bucket credentials on every command, and set your S3 stage, change these settings:
 
 ```toml
 [destination]
-stage_name=PUBLIC.my_s3_stage
+stage_name="PUBLIC.my_s3_stage"
 ```
 
-To run Snowflake with s3 as staging destination:
+To run Snowflake with S3 as the staging destination:
 
-```python
+```py
 # Create a dlt pipeline that will load
-# chess player data to the snowflake destination
-# via staging on s3
+# chess player data to the Snowflake destination
+# via staging on S3
 pipeline = dlt.pipeline(
     pipeline_name='chess_pipeline',
     destination='snowflake',
@@ -182,25 +181,25 @@ pipeline = dlt.pipeline(
 
 ### Snowflake and Google Cloud Storage
 
-Please refer to the [Google Storage filesystem documentation](./filesystem.md#google-storage) to learn how to set up your bucket with the bucket_url and credentials. For gcs you can define a stage in Snowflake and provide the stage identifier in the configuration (see config options below.) Please consult the snowflake Documentation on [how to create a stage for your GCS Bucket](https://docs.snowflake.com/en/user-guide/data-load-gcs-config). The basic steps are as follows:
+Please refer to the [Google Storage filesystem documentation](./filesystem.md#google-storage) to learn how to set up your bucket with the bucket_url and credentials. For GCS, you can define a stage in Snowflake and provide the stage identifier in the configuration (see config options below.) Please consult the Snowflake Documentation on [how to create a stage for your GCS Bucket](https://docs.snowflake.com/en/user-guide/data-load-gcs-config). The basic steps are as follows:
 
 * Create a storage integration linked to GCS and the right bucket
-* Grant access to this storage integration to the snowflake role you are using to load the data into snowflake.
+* Grant access to this storage integration to the Snowflake role you are using to load the data into Snowflake.
 * Create a stage from this storage integration in the PUBLIC namespace, or the namespace of the schema of your data.
-* Also grant access to this stage for the role you are using to load data into snowflake.
+* Also grant access to this stage for the role you are using to load data into Snowflake.
 * Provide the name of your stage (including the namespace) to dlt like so:
 
 ```toml
 [destination]
-stage_name=PUBLIC.my_gcs_stage
+stage_name="PUBLIC.my_gcs_stage"
 ```
 
-To run Snowflake with gcs as staging destination:
+To run Snowflake with GCS as the staging destination:
 
-```python
+```py
 # Create a dlt pipeline that will load
-# chess player data to the snowflake destination
-# via staging on gcs
+# chess player data to the Snowflake destination
+# via staging on GCS
 pipeline = dlt.pipeline(
     pipeline_name='chess_pipeline',
     destination='snowflake',
@@ -211,27 +210,27 @@ pipeline = dlt.pipeline(
 
 ### Snowflake and Azure Blob Storage
 
-Please refer to the [Azure Blob Storage filesystem documentation](./filesystem.md#azure-blob-storage) to learn how to set up your bucket with the bucket_url and credentials. For azure the Snowflake loader will use
-the filesystem credentials for your azure blob storage container if not specified otherwise (see config options below). Alternatively you can define an external stage in Snowflake and provide the stage identifier.
-Please consult the snowflake Documentation on [how to create a stage for your Azure Blob Storage Container](https://docs.snowflake.com/en/user-guide/data-load-azure). The basic steps are as follows:
+Please refer to the [Azure Blob Storage filesystem documentation](./filesystem.md#azure-blob-storage) to learn how to set up your bucket with the bucket_url and credentials. For Azure, the Snowflake loader will use
+the filesystem credentials for your Azure Blob Storage container if not specified otherwise (see config options below). Alternatively, you can define an external stage in Snowflake and provide the stage identifier.
+Please consult the Snowflake Documentation on [how to create a stage for your Azure Blob Storage Container](https://docs.snowflake.com/en/user-guide/data-load-azure). The basic steps are as follows:
 
 * Create a storage integration linked to Azure Blob Storage and the right container
-* Grant access to this storage integration to the snowflake role you are using to load the data into snowflake.
+* Grant access to this storage integration to the Snowflake role you are using to load the data into Snowflake.
 * Create a stage from this storage integration in the PUBLIC namespace, or the namespace of the schema of your data.
-* Also grant access to this stage for the role you are using to load data into snowflake.
+* Also grant access to this stage for the role you are using to load data into Snowflake.
 * Provide the name of your stage (including the namespace) to dlt like so:
 
 ```toml
 [destination]
-stage_name=PUBLIC.my_azure_stage
+stage_name="PUBLIC.my_azure_stage"
 ```
 
-To run Snowflake with azure as staging destination:
+To run Snowflake with Azure as the staging destination:
 
-```python
+```py
 # Create a dlt pipeline that will load
-# chess player data to the snowflake destination
-# via staging on azure
+# chess player data to the Snowflake destination
+# via staging on Azure
 pipeline = dlt.pipeline(
     pipeline_name='chess_pipeline',
     destination='snowflake',
@@ -241,7 +240,7 @@ pipeline = dlt.pipeline(
 ```
 
 ## Additional destination options
-You can define your own stage to PUT files and disable removing of the staged files after loading.
+You can define your own stage to PUT files and disable the removal of the staged files after loading.
 ```toml
 [destination.snowflake]
 # Use an existing named stage instead of the default. Default uses the implicit table stage per table
@@ -251,7 +250,7 @@ keep_staged_files=true
 ```
 
 ### dbt support
-This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-snowflake](https://github.com/dbt-labs/dbt-snowflake). Both password and key pair authentication is supported and shared with dbt runners.
+This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-snowflake](https://github.com/dbt-labs/dbt-snowflake). Both password and key pair authentication are supported and shared with dbt runners.
 
 ### Syncing of `dlt` state
 This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination)
@@ -266,4 +265,4 @@ This destination fully supports [dlt state sync](../../general-usage/state#synci
 - [Load data from HubSpot to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-snowflake)
 - [Load data from Chess.com to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-snowflake)
 - [Load data from Google Sheets to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-snowflake)
-<!--@@@DLT_SNIPPET_END tuba::snowflake-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::snowflake-->
diff --git a/docs/website/docs/dlt-ecosystem/destinations/synapse.md b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
index 6ace1ac5a8..d803b88a2c 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/synapse.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
@@ -8,7 +8,7 @@ keywords: [synapse, destination, data warehouse]
 
 ## Install dlt with Synapse
 **To install the DLT library with Synapse dependencies:**
-```
+```sh
 pip install dlt[synapse]
 ```
 
@@ -18,13 +18,13 @@ pip install dlt[synapse]
 
 * **Microsoft ODBC Driver for SQL Server**
 
-    _Microsoft ODBC Driver for SQL Server_ must be installed to use this destination.
+    The _Microsoft ODBC Driver for SQL Server_ must be installed to use this destination.
     This can't be included with `dlt`'s python dependencies, so you must install it separately on your system. You can find the official installation instructions [here](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16).
 
     Supported driver versions:
     * `ODBC Driver 18 for SQL Server`
 
-    > 💡 Older driver versions don't properly work, because they don't support the `LongAsMax` keyword that got [introduced](https://learn.microsoft.com/en-us/sql/connect/odbc/windows/features-of-the-microsoft-odbc-driver-for-sql-server-on-windows?view=sql-server-ver15#microsoft-odbc-driver-180-for-sql-server-on-windows) in `ODBC Driver 18 for SQL Server`. Synapse does not support the legacy ["long data types"](https://learn.microsoft.com/en-us/sql/t-sql/data-types/ntext-text-and-image-transact-sql), and requires "max data types" instead. `dlt` uses the `LongAsMax` keyword to automatically do the conversion.
+    > 💡 Older driver versions don't work properly because they don't support the `LongAsMax` keyword that was [introduced](https://learn.microsoft.com/en-us/sql/connect/odbc/windows/features-of-the-microsoft-odbc-driver-for-sql-server-on-windows?view=sql-server-ver15#microsoft-odbc-driver-180-for-sql-server-on-windows) in `ODBC Driver 18 for SQL Server`. Synapse does not support the legacy ["long data types"](https://learn.microsoft.com/en-us/sql/t-sql/data-types/ntext-text-and-image-transact-sql), and requires "max data types" instead. `dlt` uses the `LongAsMax` keyword to automatically do the conversion.
 * **Azure Synapse Workspace and dedicated SQL pool**
 
     You need an Azure Synapse workspace with a dedicated SQL pool to load data into. If you don't have one yet, you can use this [quickstart](https://learn.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-sql-pool-studio).
@@ -32,12 +32,12 @@ pip install dlt[synapse]
 ### Steps
 
 **1. Initialize a project with a pipeline that loads to Synapse by running**
-```
+```sh
 dlt init chess synapse
 ```
 
 **2. Install the necessary dependencies for Synapse by running**
-```
+```sh
 pip install -r requirements.txt
 ```
 This will install `dlt` with the **synapse** extra that contains all dependencies required for the Synapse destination.
@@ -67,7 +67,7 @@ GRANT ADMINISTER DATABASE BULK OPERATIONS TO loader; -- only required when loadi
 
 Optionally, you can create a `WORKLOAD GROUP` and add the `loader` user as a member to manage [workload isolation](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-workload-isolation). See the [instructions](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql/data-loading-best-practices#create-a-loading-user) on setting up a loader user for an example of how to do this.
 
-**3. Enter your credentials into `.dlt/secrets.toml`.**
+**4. Enter your credentials into `.dlt/secrets.toml`.**
 
 Example, replace with your database connection info:
 ```toml
@@ -86,7 +86,7 @@ destination.synapse.credentials = "synapse://loader:your_loader_password@your_sy
 ```
 
 To pass credentials directly you can use the `credentials` argument of `dlt.destinations.synapse(...)`:
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name='chess',
     destination=dlt.destinations.synapse(
@@ -97,7 +97,7 @@ pipeline = dlt.pipeline(
 ```
 
 ## Write disposition
-All write dispositions are supported
+All write dispositions are supported.
 
 If you set the [`replace` strategy](../../general-usage/full-loading.md) to `staging-optimized`, the destination tables will be dropped and replaced by the staging tables with an `ALTER SCHEMA ... TRANSFER` command. Please note that this operation is **not** atomic—it involves multiple DDL commands and Synapse does not support DDL transactions.
 
@@ -117,7 +117,7 @@ Data is loaded via `INSERT` statements by default.
 ## Table index type
 The [table index type](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index) of the created tables can be configured at the resource level with the `synapse_adapter`:
 
-```python
+```py
 info = pipeline.run(
     synapse_adapter(
         data=your_resource,
@@ -134,12 +134,11 @@ Possible values:
 > ❗ Important:
 >* **Set `default_table_index_type` to `"clustered_columnstore_index"` if you want to change the default** (see [additional destination options](#additional-destination-options)).
 >* **CLUSTERED COLUMNSTORE INDEX tables do not support the `varchar(max)`, `nvarchar(max)`, and `varbinary(max)` data types.** If you don't specify the `precision` for columns that map to any of these types, `dlt` will use the maximum lengths `varchar(4000)`, `nvarchar(4000)`, and `varbinary(8000)`.
->* **While Synapse creates CLUSTERED COLUMNSTORE INDEXES by default, `dlt` creates HEAP tables by default.** HEAP is a more robust choice, because it supports all data types and doesn't require conversions.
->* **When using the `insert-from-staging` [`replace` strategy](../../general-usage/full-loading.md), the staging tables are always created as HEAP tables**—any configuration of the table index types is ignored. The HEAP strategy makes sense
- for staging tables for reasons explained [here](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#heap-tables).
->* **When using the `staging-optimized` [`replace` strategy](../../general-usage/full-loading.md), the staging tables are already created with the configured table index type**, because the staging table becomes the final table.   
->* **`dlt` system tables are always created as HEAP tables, regardless of any configuration.** This is in line with Microsoft's recommendation that "for small lookup tables, less than 60 million rows, consider using HEAP or clustered index for faster query performance." 
->* Child tables, if any, inherent the table index type of their parent table. 
+>* **While Synapse creates CLUSTERED COLUMNSTORE INDEXES by default, `dlt` creates HEAP tables by default.** HEAP is a more robust choice because it supports all data types and doesn't require conversions.
+>* **When using the `insert-from-staging` [`replace` strategy](../../general-usage/full-loading.md), the staging tables are always created as HEAP tables**—any configuration of the table index types is ignored. The HEAP strategy makes sense for staging tables for reasons explained [here](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-tables-index#heap-tables).
+>* **When using the `staging-optimized` [`replace` strategy](../../general-usage/full-loading.md), the staging tables are already created with the configured table index type**, because the staging table becomes the final table.
+>* **`dlt` system tables are always created as HEAP tables, regardless of any configuration.** This is in line with Microsoft's recommendation that "for small lookup tables, less than 60 million rows, consider using HEAP or clustered index for faster query performance."
+>* Child tables, if any, inherit the table index type of their parent table.
 
 ## Supported column hints
 
@@ -148,7 +147,7 @@ Synapse supports the following [column hints](https://dlthub.com/docs/general-us
 * `primary_key` - creates a `PRIMARY KEY NONCLUSTERED NOT ENFORCED` constraint on the column
 * `unique` - creates a `UNIQUE NOT ENFORCED` constraint on the column
 
-> ❗ These hints are **disabled by default**. This is because the `PRIMARY KEY` and `UNIQUE` [constraints](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-table-constraints) are tricky in Synapse: they are **not enforced** and can lead to innacurate results if the user does not ensure all column values are unique. For the column hints to take effect, the `create_indexes` configuration needs to be set to `True`, see [additional destination options](#additional-destination-options).
+> ❗ These hints are **disabled by default**. This is because the `PRIMARY KEY` and `UNIQUE` [constraints](https://learn.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-table-constraints) are tricky in Synapse: they are **not enforced** and can lead to inaccurate results if the user does not ensure all column values are unique. For the column hints to take effect, the `create_indexes` configuration needs to be set to `True`, see [additional destination options](#additional-destination-options).
 
 ## Staging support
 Synapse supports Azure Blob Storage (both standard and [ADLS Gen2](https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction)) as a file staging destination. `dlt` first uploads Parquet files to the blob container, and then instructs Synapse to read the Parquet file and load its data into a Synapse table using the [COPY INTO](https://learn.microsoft.com/en-us/sql/t-sql/statements/copy-into-transact-sql) statement.
@@ -157,7 +156,7 @@ Please refer to the [Azure Blob Storage filesystem documentation](./filesystem.m
 
 To run Synapse with staging on Azure Blob Storage:
 
-```python
+```py
 # Create a dlt pipeline that will load
 # chess player data to the snowflake destination
 # via staging on Azure Blob Storage
@@ -190,9 +189,9 @@ destination.synapse.credentials = "synapse://loader:your_loader_password@your_sy
 ```
 
 Descriptions:
-- `default_table_index_type` sets the [table index type](#table-index-type) that is used if no table index type is specified on the resource. 
+- `default_table_index_type` sets the [table index type](#table-index-type) that is used if no table index type is specified on the resource.
 - `create_indexes` determines if `primary_key` and `unique` [column hints](#supported-column-hints) are applied.
-- `staging_use_msi` determines if the Managed Identity of the Synapse workspace is used to authorize access to the [staging](#staging-support) Storage Account. Ensure the Managed Identity has the [Storage Blob Data Reader](https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles#storage-blob-data-reader) role (or a higher-priviliged role) assigned on the blob container if you set this option to `"true"`.
+- `staging_use_msi` determines if the Managed Identity of the Synapse workspace is used to authorize access to the [staging](#staging-support) Storage Account. Ensure the Managed Identity has the [Storage Blob Data Reader](https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles#storage-blob-data-reader) role (or a higher-privileged role) assigned on the blob container if you set this option to `"true"`.
 - `port` used for the ODBC connection.
 - `connect_timeout` sets the timeout for the `pyodbc` connection attempt, in seconds.
 
@@ -212,4 +211,4 @@ This destination fully supports [dlt state sync](../../general-usage/state#synci
 - [Load data from GitHub to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-synapse)
 - [Load data from Stripe to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-synapse)
 - [Load data from Chess.com to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-synapse)
-<!--@@@DLT_SNIPPET_END tuba::synapse-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::synapse-->
diff --git a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md
index 2ec09e9c24..fb87ccfa6f 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md
@@ -6,14 +6,14 @@ keywords: [weaviate, vector database, destination, dlt]
 
 # Weaviate
 
-[Weaviate](https://weaviate.io/) is an open source vector database. It allows you to store data objects and perform similarity searches over them.
-This destination helps you to load data into Weaviate from [dlt resources](../../general-usage/resource.md).
+[Weaviate](https://weaviate.io/) is an open-source vector database. It allows you to store data objects and perform similarity searches over them.
+This destination helps you load data into Weaviate from [dlt resources](../../general-usage/resource.md).
 
 ## Setup Guide
 
 1. To use Weaviate as a destination, make sure dlt is installed with the 'weaviate' extra:
 
-```bash
+```sh
 pip install dlt[weaviate]
 ```
 
@@ -30,18 +30,18 @@ X-OpenAI-Api-Key = "your-openai-api-key"
 
 In this setup guide, we are using the [Weaviate Cloud Services](https://console.weaviate.cloud/) to get a Weaviate instance and [OpenAI API](https://platform.openai.com/) for generating embeddings through the [text2vec-openai](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-openai) module.
 
-You can host your own weaviate instance using docker compose, kubernetes or embedded. Refer to Weaviate's [How-to: Install](https://weaviate.io/developers/weaviate/installation) or [dlt recipe we use for our tests](#run-weaviate-fully-standalone). In that case you can skip the credentials part altogether:
+You can host your own Weaviate instance using Docker Compose, Kubernetes, or embedded. Refer to Weaviate's [How-to: Install](https://weaviate.io/developers/weaviate/installation) or [dlt recipe we use for our tests](#run-weaviate-fully-standalone). In that case, you can skip the credentials part altogether:
 
 ```toml
 [destination.weaviate.credentials.additional_headers]
 X-OpenAI-Api-Key = "your-openai-api-key"
 ```
-The `url` will default to **http://localhost:8080** and `api_key` is not defined - which are the defaults for Weaviate container.
+The `url` will default to **http://localhost:8080** and `api_key` is not defined - which are the defaults for the Weaviate container.
 
 
 3. Define the source of the data. For starters, let's load some data from a simple data structure:
 
-```python
+```py
 import dlt
 from dlt.destinations.adapters import weaviate_adapter
 
@@ -63,7 +63,7 @@ movies = [
 
 4. Define the pipeline:
 
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name="movies",
     destination="weaviate",
@@ -73,7 +73,7 @@ pipeline = dlt.pipeline(
 
 5. Run the pipeline:
 
-```python
+```py
 info = pipeline.run(
     weaviate_adapter(
         movies,
@@ -84,7 +84,7 @@ info = pipeline.run(
 
 6. Check the results:
 
-```python
+```py
 print(info)
 ```
 
@@ -96,12 +96,12 @@ Weaviate destination is different from other [dlt destinations](../destinations/
 
 The `weaviate_adapter` is a helper function that configures the resource for the Weaviate destination:
 
-```python
+```py
 weaviate_adapter(data, vectorize, tokenization)
 ```
 
 It accepts the following arguments:
-- `data`: a dlt resource object or a Python data structure (e.g. a list of dictionaries).
+- `data`: a dlt resource object or a Python data structure (e.g., a list of dictionaries).
 - `vectorize`: a name of the field or a list of names that should be vectorized by Weaviate.
 - `tokenization`: the dictionary containing the tokenization configuration for a field. The dictionary should have the following structure `{'field_name': 'method'}`. Valid methods are "word", "lowercase", "whitespace", "field". The default is "word". See [Property tokenization](https://weaviate.io/developers/weaviate/config-refs/schema#property-tokenization) in Weaviate documentation for more details.
 
@@ -109,7 +109,7 @@ Returns: a [dlt resource](../../general-usage/resource.md) object that you can p
 
 Example:
 
-```python
+```py
 weaviate_adapter(
     resource,
     vectorize=["title", "description"],
@@ -133,7 +133,7 @@ The [replace](../../general-usage/full-loading.md) disposition replaces the data
 
 In the movie example from the [setup guide](#setup-guide), we can use the `replace` disposition to reload the data every time we run the pipeline:
 
-```python
+```py
 info = pipeline.run(
     weaviate_adapter(
         movies,
@@ -146,9 +146,9 @@ info = pipeline.run(
 ### Merge
 
 The [merge](../../general-usage/incremental-loading.md) write disposition merges the data from the resource with the data in the destination.
-For `merge` disposition you would need to specify a `primary_key` for the resource:
+For the `merge` disposition, you would need to specify a `primary_key` for the resource:
 
-```python
+```py
 info = pipeline.run(
     weaviate_adapter(
         movies,
@@ -159,18 +159,18 @@ info = pipeline.run(
 )
 ```
 
-Internally dlt will use `primary_key` (`document_id` in the example above) to generate a unique identifier ([UUID](https://weaviate.io/developers/weaviate/manage-data/create#id)) for each object in Weaviate. If the object with the same UUID already exists in Weaviate, it will be updated with the new data. Otherwise, a new object will be created.
+Internally, dlt will use `primary_key` (`document_id` in the example above) to generate a unique identifier ([UUID](https://weaviate.io/developers/weaviate/manage-data/create#id)) for each object in Weaviate. If the object with the same UUID already exists in Weaviate, it will be updated with the new data. Otherwise, a new object will be created.
 
 
 :::caution
 
-If you are using the merge write disposition, you must set it from the first run of your pipeline, otherwise the data will be duplicated in the database on subsequent loads.
+If you are using the `merge` write disposition, you must set it from the first run of your pipeline; otherwise, the data will be duplicated in the database on subsequent loads.
 
 :::
 
 ### Append
 
-This is the default disposition. It will append the data to the existing data in the destination ignoring the `primary_key` field.
+This is the default disposition. It will append the data to the existing data in the destination, ignoring the `primary_key` field.
 
 ## Data loading
 
@@ -199,11 +199,11 @@ Weaviate uses classes to categorize and identify data. To avoid potential naming
 
 For example, if you have a dataset named `movies_dataset` and a table named `actors`, the Weaviate class name would be `MoviesDataset_Actors` (the default separator is an underscore).
 
-However, if you prefer to have class names without the dataset prefix, skip `dataset_name` argument.
+However, if you prefer to have class names without the dataset prefix, skip the `dataset_name` argument.
 
 For example:
 
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name="movies",
     destination="weaviate",
@@ -241,15 +241,15 @@ The default naming convention described above will preserve the casing of the pr
 in Weaviate but also requires that your input data does not have clashing property names when comparing case insensitive ie. (`caseName` == `casename`). In such case
 Weaviate destination will fail to create classes and report a conflict.
 
-You can configure alternative naming convention which will lowercase all properties. The clashing properties will be merged and the classes created. Still if you have a document where clashing properties like:
+You can configure an alternative naming convention which will lowercase all properties. The clashing properties will be merged and the classes created. Still, if you have a document where clashing properties like:
 ```json
 {"camelCase": 1, "CamelCase": 2}
 ```
 it will be normalized to:
-```
+```json
 {"camelcase": 2}
 ```
-so your best course of action is to clean up the data yourself before loading and use default naming convention. Nevertheless you can configure the alternative in `config.toml`:
+so your best course of action is to clean up the data yourself before loading and use the default naming convention. Nevertheless, you can configure the alternative in `config.toml`:
 ```toml
 [schema]
 naming="dlt.destinations.weaviate.impl.ci_naming"
@@ -291,12 +291,12 @@ Below is an example that configures the **contextionary** vectorizer. You can pu
 vectorizer="text2vec-contextionary"
 module_config={text2vec-contextionary = { vectorizeClassName = false, vectorizePropertyName = true}}
 ```
-You can find docker composer with the instructions to run [here](https://github.com/dlt-hub/dlt/tree/devel/dlt/destinations/weaviate/README.md)
+You can find Docker Compose with the instructions to run [here](https://github.com/dlt-hub/dlt/tree/devel/dlt/destinations/weaviate/README.md)
 
 
 ### dbt support
 
-Currently Weaviate destination does not support dbt.
+Currently, Weaviate destination does not support dbt.
 
 ### Syncing of `dlt` state
 
@@ -304,4 +304,4 @@ Weaviate destination supports syncing of the `dlt` state.
 
 
 <!--@@@DLT_SNIPPET_START tuba::weaviate-->
-<!--@@@DLT_SNIPPET_END tuba::weaviate-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::weaviate-->
diff --git a/docs/website/docs/dlt-ecosystem/file-formats/insert-format.md b/docs/website/docs/dlt-ecosystem/file-formats/insert-format.md
index a6d9fe78b6..641be9a106 100644
--- a/docs/website/docs/dlt-ecosystem/file-formats/insert-format.md
+++ b/docs/website/docs/dlt-ecosystem/file-formats/insert-format.md
@@ -4,30 +4,27 @@ description: The INSERT file format
 keywords: [insert values, file formats]
 ---
 
-# SQL INSERT file format
+# SQL INSERT File Format
 
-This file format contains an INSERT...VALUES statement to be executed on the destination during the
-`load` stage.
+This file format contains an INSERT...VALUES statement to be executed on the destination during the `load` stage.
 
 Additional data types are stored as follows:
 
-- `datetime` and `date` as ISO strings;
-- `decimal` as text representation of decimal number;
-- `binary` depends on the format accepted by the destination;
-- `complex` depends on the format accepted by the destination.
+- `datetime` and `date` are stored as ISO strings;
+- `decimal` is stored as a text representation of a decimal number;
+- `binary` storage depends on the format accepted by the destination;
+- `complex` storage also depends on the format accepted by the destination.
 
-This file format is
-[compressed](../../reference/performance.md#disabling-and-enabling-file-compression) by default.
+This file format is [compressed](../../reference/performance.md#disabling-and-enabling-file-compression) by default.
 
-## Supported destinations
+## Supported Destinations
 
-Used by default by: **DuckDB**, **Postgres**, **Redshift**.
+This format is used by default by: **DuckDB**, **Postgres**, **Redshift**.
 
-Supported by: **filesystem**.
+It is also supported by: **filesystem**.
 
-By setting the `loader_file_format` argument to `insert_values` in the run command, the pipeline
-will store your data in the INSERT format to the destination:
+By setting the `loader_file_format` argument to `insert_values` in the run command, the pipeline will store your data in the INSERT format at the destination:
 
-```python
+```py
 info = pipeline.run(some_source(), loader_file_format="insert_values")
 ```
diff --git a/docs/website/docs/dlt-ecosystem/file-formats/jsonl.md b/docs/website/docs/dlt-ecosystem/file-formats/jsonl.md
index 34f636f88d..7467c6f639 100644
--- a/docs/website/docs/dlt-ecosystem/file-formats/jsonl.md
+++ b/docs/website/docs/dlt-ecosystem/file-formats/jsonl.md
@@ -4,29 +4,29 @@ description: The jsonl file format
 keywords: [jsonl, file formats]
 ---
 
-# jsonl - JSON delimited
+# jsonl - JSON Delimited
 
-`JSON delimited` is a file format that stores several `JSON` documents in one file. The `JSON`
+JSON Delimited is a file format that stores several JSON documents in one file. The JSON
 documents are separated by a new line.
 
 Additional data types are stored as follows:
 
-- `datetime` and `date` as ISO strings;
-- `decimal` as text representation of decimal number;
-- `binary` is base64 encoded string;
-- `HexBytes` is hex encoded string;
+- `datetime` and `date` are stored as ISO strings;
+- `decimal` is stored as a text representation of a decimal number;
+- `binary` is stored as a base64 encoded string;
+- `HexBytes` is stored as a hex encoded string;
 - `complex` is serialized as a string.
 
 This file format is
 [compressed](../../reference/performance.md#disabling-and-enabling-file-compression) by default.
 
-## Supported destinations
+## Supported Destinations
 
-Used by default by: **BigQuery**, **Snowflake**, **filesystem**.
+This format is used by default by: **BigQuery**, **Snowflake**, **filesystem**.
 
 By setting the `loader_file_format` argument to `jsonl` in the run command, the pipeline will store
-your data in the jsonl format to the destination:
+your data in the jsonl format at the destination:
 
-```python
+```py
 info = pipeline.run(some_source(), loader_file_format="jsonl")
 ```
diff --git a/docs/website/docs/dlt-ecosystem/file-formats/parquet.md b/docs/website/docs/dlt-ecosystem/file-formats/parquet.md
index 4b0f63d22b..94aaaf4884 100644
--- a/docs/website/docs/dlt-ecosystem/file-formats/parquet.md
+++ b/docs/website/docs/dlt-ecosystem/file-formats/parquet.md
@@ -4,52 +4,41 @@ description: The parquet file format
 keywords: [parquet, file formats]
 ---
 
-# Parquet file format
+# Parquet File Format
 
-[Apache Parquet](https://en.wikipedia.org/wiki/Apache_Parquet) is a free and open-source
-column-oriented data storage format in the Apache Hadoop ecosystem. `dlt` is able to store data in
-this format when configured to do so.
+[Apache Parquet](https://en.wikipedia.org/wiki/Apache_Parquet) is a free and open-source column-oriented data storage format in the Apache Hadoop ecosystem. `dlt` is capable of storing data in this format when configured to do so.
 
-To use this format you need a `pyarrow` package. You can get this package as a `dlt` extra as well:
+To use this format, you need a `pyarrow` package. You can get this package as a `dlt` extra as well:
 
 ```sh
 pip install dlt[parquet]
 ```
 
-## Supported destinations
+## Supported Destinations
 
 Supported by: **BigQuery**, **DuckDB**, **Snowflake**, **filesystem**, **Athena**
 
-By setting the `loader_file_format` argument to `parquet` in the run command, the pipeline will
-store your data in the parquet format to the destination:
+By setting the `loader_file_format` argument to `parquet` in the run command, the pipeline will store your data in the parquet format at the destination:
 
-```python
+```py
 info = pipeline.run(some_source(), loader_file_format="parquet")
 ```
 
 ## Destination AutoConfig
-`dlt` uses [destination capabilities](../../walkthroughs/create-new-destination.md#3-set-the-destination-capabilities) to configure parquet writer:
-* uses decimal and wei precision to pick the right **decimal type** and sets precision and scale
-* uses timestamp precision to pick right **timestamp type** resolution (seconds, micro or nano)
+`dlt` uses [destination capabilities](../../walkthroughs/create-new-destination.md#3-set-the-destination-capabilities) to configure the parquet writer:
+* It uses decimal and wei precision to pick the right **decimal type** and sets precision and scale.
+* It uses timestamp precision to pick the right **timestamp type** resolution (seconds, micro, or nano).
 
 ## Options
 
-Under the hood `dlt` uses the
-[pyarrow parquet writer](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html)
-to create the files. The following options can be used to change the behavior of the writer:
+Under the hood, `dlt` uses the [pyarrow parquet writer](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html) to create the files. The following options can be used to change the behavior of the writer:
 
-- `flavor`: Sanitize schema or set other compatibility options to work with various target systems.
-  Defaults to "spark".
-- `version`: Determine which Parquet logical types are available for use, whether the reduced set
-  from the Parquet 1.x.x format or the expanded logical types added in later format versions.
-  Defaults to "2.4".
-- `data_page_size`: Set a target threshold for the approximate encoded size of data pages within a
-  column chunk (in bytes). Defaults to "1048576".
-- `timestamp_timezone`: A string specifying timezone, default is UTC
+- `flavor`: Sanitize schema or set other compatibility options to work with various target systems. Defaults to "spark".
+- `version`: Determine which Parquet logical types are available for use, whether the reduced set from the Parquet 1.x.x format or the expanded logical types added in later format versions. Defaults to "2.4".
+- `data_page_size`: Set a target threshold for the approximate encoded size of data pages within a column chunk (in bytes). Defaults to "1048576".
+- `timestamp_timezone`: A string specifying timezone, default is UTC.
 
-Read the
-[pyarrow parquet docs](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html)
-to learn more about these settings.
+Read the [pyarrow parquet docs](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html) to learn more about these settings.
 
 Example:
 
@@ -62,9 +51,9 @@ data_page_size=1048576
 timestamp_timezone="Europe/Berlin"
 ```
 
-or using environment variables:
+Or using environment variables:
 
-```
+```sh
 NORMALIZE__DATA_WRITER__FLAVOR
 NORMALIZE__DATA_WRITER__VERSION
 NORMALIZE__DATA_WRITER__DATA_PAGE_SIZE
diff --git a/docs/website/docs/dlt-ecosystem/staging.md b/docs/website/docs/dlt-ecosystem/staging.md
index d2ed03a2a2..e3a60dfa51 100644
--- a/docs/website/docs/dlt-ecosystem/staging.md
+++ b/docs/website/docs/dlt-ecosystem/staging.md
@@ -48,7 +48,7 @@ In essence, you need to set up two destinations and then pass them to `dlt.pipel
 4. **Chain staging to destination and request `parquet` file format.**
 
     Pass the `staging` argument to `dlt.pipeline`. It works like the destination `argument`:
-    ```python
+    ```py
     # Create a dlt pipeline that will load
     # chess player data to the redshift destination
     # via staging on s3
@@ -60,7 +60,7 @@ In essence, you need to set up two destinations and then pass them to `dlt.pipel
     )
     ```
     `dlt` will automatically select an appropriate loader file format for the staging files. Below we explicitly specify `parquet` file format (just to demonstrate how to do it):
-    ```python
+    ```py
     info = pipeline.run(chess(), loader_file_format="parquet")
     ```
 
diff --git a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md
index b2b6b27fc3..42f31d4875 100644
--- a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md
+++ b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt.md
@@ -6,34 +6,34 @@ keywords: [transform, dbt, runner]
 
 # Transform the data with dbt
 
-[dbt](https://github.com/dbt-labs/dbt-core) is a framework that allows simple structuring of your transformations into DAGs. The benefits of
+[dbt](https://github.com/dbt-labs/dbt-core) is a framework that allows for the simple structuring of your transformations into DAGs. The benefits of
 using dbt include:
 
 - End-to-end cross-db compatibility for dlt→dbt pipelines.
-- Easy to use by SQL analysts, low learning curve.
-- Highly flexible and configurable in usage, supports templating, can run backfills etc.
-- Supports testing and accelerates troubleshooting.
+- Ease of use by SQL analysts, with a low learning curve.
+- High flexibility and configurability in usage, supports templating, can run backfills, etc.
+- Support for testing and accelerated troubleshooting.
 
 ## dbt runner in dlt
 
 You can run dbt with `dlt` by using the dbt runner.
 
-The dbt runner
+The dbt runner:
 
-- can create a virtual env for dbt on the fly;
-- can run a dbt package from online (e.g. GitHub) or from local files;
-- passes configuration and credentials to dbt, so you do not need to handle them separately from
+- Can create a virtual env for dbt on the fly;
+- Can run a dbt package from online sources (e.g., GitHub) or from local files;
+- Passes configuration and credentials to dbt, so you do not need to handle them separately from
   `dlt`, enabling dbt to configure on the fly.
 
 ## How to use the dbt runner
 
 For an example of how to use the dbt runner, see the
 [jaffle shop example](https://github.com/dlt-hub/dlt/blob/devel/docs/examples/archive/dbt_run_jaffle.py).
-Included below in another example where we run a `dlt` pipeline and then a dbt package via `dlt`:
+Included below is another example where we run a `dlt` pipeline and then a dbt package via `dlt`:
 
 > 💡 Docstrings are available to read in your IDE.
 
-```python
+```py
 # load all pipedrive endpoints to pipedrive_raw dataset
 pipeline = dlt.pipeline(
     pipeline_name='pipedrive',
@@ -81,7 +81,7 @@ for m in models:
 ```
 
 ## How to run dbt runner without pipeline
-You can use dbt runner without dlt pipeline. Example below will clone and run **jaffle shop** using a dbt profile that you supply.
+You can use the dbt runner without a dlt pipeline. The example below will clone and run **jaffle shop** using a dbt profile that you supply.
 It assumes that dbt is installed in the current Python environment and the `profile.yml` is in the same folder as the Python script.
 <!--@@@DLT_SNIPPET_START ./dbt-snippets.py::run_dbt_standalone-->
 ```py
@@ -102,7 +102,7 @@ models = runner.run_all()
 ```
 <!--@@@DLT_SNIPPET_END ./dbt-snippets.py::run_dbt_standalone-->
 
-Here's example **duckdb** profile
+Here's an example **duckdb** profile
 ```yaml
 config:
   # do not track usage, do not create .user.yml
@@ -128,4 +128,4 @@ If you want to transform the data before loading, you can use Python. If you wan
 data after loading, you can use dbt or one of the following:
 
 1. [`dlt` SQL client.](../sql.md)
-1. [Pandas.](../pandas.md)
+2. [Pandas.](../pandas.md)
diff --git a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt_cloud.md b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt_cloud.md
index 1f658e4f95..d15c4eb84c 100644
--- a/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt_cloud.md
+++ b/docs/website/docs/dlt-ecosystem/transformations/dbt/dbt_cloud.md
@@ -1,5 +1,5 @@
 ---
-title: Transforming the data with dbt Cloud
+title: Transforming the Data with dbt Cloud
 description: Transforming the data loaded by a dlt pipeline with dbt Cloud
 keywords: [transform, sql]
 ---
@@ -9,9 +9,9 @@ keywords: [transform, sql]
 ## API Client
 
 The DBT Cloud Client is a Python class designed to interact with the dbt Cloud API (version 2).
-It provides methods to perform various operations on dbt Cloud, such as triggering job runs and retrieving job run status.
+It provides methods to perform various operations on dbt Cloud, such as triggering job runs and retrieving job run statuses.
 
-```python
+```py
 from dlt.helpers.dbt_cloud import DBTCloudClientV2
 
 # Initialize the client
@@ -26,7 +26,7 @@ run_status = client.get_run_status(run_id=job_run_id)
 print(f"Job run status: {run_status['status_humanized']}")
 ```
 
-## Helper functions
+## Helper Functions
 
 These Python functions provide an interface to interact with the dbt Cloud API.
 They simplify the process of triggering and monitoring job runs in dbt Cloud.
@@ -36,7 +36,7 @@ They simplify the process of triggering and monitoring job runs in dbt Cloud.
 This function triggers a job run in dbt Cloud using the specified configuration.
 It supports various customization options and allows for monitoring the job's status.
 
-```python
+```py
 from dlt.helpers.dbt_cloud import run_dbt_cloud_job
 
 # Trigger a job run with default configuration
@@ -53,19 +53,19 @@ status = run_dbt_cloud_job(job_id=1234, data=additional_data, wait_for_outcome=T
 
 ### `get_dbt_cloud_run_status()`
 
-If you have already started job run and have a run ID, then you can use the `get_dbt_cloud_run_status` function.
+If you have already started a job run and have a run ID, then you can use the `get_dbt_cloud_run_status` function.
 
 This function retrieves the full information about a specific dbt Cloud job run.
 It also supports options for waiting until the run is complete.
 
-```python
+```py
 from dlt.helpers.dbt_cloud import get_dbt_cloud_run_status
 
 # Retrieve status for a specific run
 status = get_dbt_cloud_run_status(run_id=1234, wait_for_outcome=True)
 ```
 
-## Set credentials
+## Set Credentials
 
 ### secrets.toml
 
@@ -74,29 +74,29 @@ When using a dlt locally, we recommend using the `.dlt/secrets.toml` method to s
 If you used the `dlt init` command, then the `.dlt` folder has already been created.
 Otherwise, create a `.dlt` folder in your working directory and a `secrets.toml` file inside it.
 
-It's where you store sensitive information securely, like access tokens. Keep this file safe.
+This is where you store sensitive information securely, like access tokens. Keep this file safe.
 
 Use the following format for dbt Cloud API authentication:
 
 ```toml
 [dbt_cloud]
 api_token = "set me up!" # required for authentication
-account_id = "set me up!" # required for both helpers function
-job_id = "set me up!" # optional only for run_dbt_cloud_job function (you can pass this explicitly as an argument to the function)
-run_id = "set me up!" # optional for get_dbt_cloud_run_status (you can pass this explicitly as an argument to the function)
+account_id = "set me up!" # required for both helper functions
+job_id = "set me up!" # optional only for the run_dbt_cloud_job function (you can pass this explicitly as an argument to the function)
+run_id = "set me up!" # optional for the get_dbt_cloud_run_status function (you can pass this explicitly as an argument to the function)
 ```
 
-### Environment variables
+### Environment Variables
 
-`dlt` supports reading credentials from environment.
+`dlt` supports reading credentials from the environment.
 
 If dlt tries to read this from environment variables, it will use a different naming convention.
 
-For environment variables all names are capitalized and sections are separated with double underscore "__".
+For environment variables, all names are capitalized and sections are separated with a double underscore "__".
 
-For example, for the above secrets, we would need to put into environment:
+For example, for the above secrets, we would need to put into the environment:
 
-```
+```sh
 DBT_CLOUD__API_TOKEN
 DBT_CLOUD__ACCOUNT_ID
 DBT_CLOUD__JOB_ID
diff --git a/docs/website/docs/dlt-ecosystem/transformations/pandas.md b/docs/website/docs/dlt-ecosystem/transformations/pandas.md
index 6ab98090ba..5a82d8be66 100644
--- a/docs/website/docs/dlt-ecosystem/transformations/pandas.md
+++ b/docs/website/docs/dlt-ecosystem/transformations/pandas.md
@@ -4,14 +4,14 @@ description: Transform the data loaded by a dlt pipeline with Pandas
 keywords: [transform, pandas]
 ---
 
-# Transform the data with Pandas
+# Transform the Data with Pandas
 
-You can fetch results of any SQL query as a dataframe. If the destination is supporting that
-natively (i.e. BigQuery and DuckDB), `dlt` uses the native method. Thanks to that, reading
-dataframes may be really fast! The example below reads GitHub reactions data from the `issues` table and
-counts reaction types.
+You can fetch the results of any SQL query as a dataframe. If the destination supports that
+natively (i.e., BigQuery and DuckDB), `dlt` uses the native method. Thanks to this, reading
+dataframes can be really fast! The example below reads GitHub reactions data from the `issues` table and
+counts the reaction types.
 
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name="github_pipeline",
     destination="duckdb",
@@ -27,15 +27,15 @@ with pipeline.sql_client() as client:
 counts = reactions.sum(0).sort_values(0, ascending=False)
 ```
 
-The `df` method above returns all the data in the cursor as data frame. You can also fetch data in
-chunks by passing `chunk_size` argument to the `df` method.
+The `df` method above returns all the data in the cursor as a data frame. You can also fetch data in
+chunks by passing the `chunk_size` argument to the `df` method.
 
 Once your data is in a Pandas dataframe, you can transform it as needed.
 
-## Other transforming tools
+## Other Transforming Tools
 
 If you want to transform the data before loading, you can use Python. If you want to transform the
 data after loading, you can use Pandas or one of the following:
 
 1. [dbt.](dbt/dbt.md) (recommended)
-1. [`dlt` SQL client.](sql.md)
+2. [`dlt` SQL client.](sql.md)
diff --git a/docs/website/docs/dlt-ecosystem/transformations/sql.md b/docs/website/docs/dlt-ecosystem/transformations/sql.md
index cc1576229b..ad37c61bd8 100644
--- a/docs/website/docs/dlt-ecosystem/transformations/sql.md
+++ b/docs/website/docs/dlt-ecosystem/transformations/sql.md
@@ -6,36 +6,40 @@ keywords: [transform, sql]
 
 # Transform the data using the `dlt` SQL client
 
-A simple alternative to dbt is to query the data using the `dlt` SQL client and then performing the
+A simple alternative to dbt is to query the data using the `dlt` SQL client and then perform the
 transformations using Python. The `execute_sql` method allows you to execute any SQL statement,
-including statements that change the database schema or data in the tables. In the example below we
-insert a row into `customers` table. Note that the syntax is the same as for any standard `dbapi`
+including statements that change the database schema or data in the tables. In the example below, we
+insert a row into the `customers` table. Note that the syntax is the same as for any standard `dbapi`
 connection.
 
-```python
+```py
 pipeline = dlt.pipeline(destination="bigquery", dataset_name="crm")
 try:
     with pipeline.sql_client() as client:
         client.sql_client.execute_sql(
-            f"INSERT INTO customers VALUES (%s, %s, %s)",
+            "INSERT INTO customers VALUES (%s, %s, %s)",
             10,
             "Fred",
             "fred@fred.com"
         )
+except Exception:
+    ...
 ```
 
-In the case of SELECT queries, the data is returned as a list of row, with the elements of a row
+In the case of SELECT queries, the data is returned as a list of rows, with the elements of a row
 corresponding to selected columns.
 
-```python
+```py
 try:
     with pipeline.sql_client() as client:
         res = client.execute_sql(
             "SELECT id, name, email FROM customers WHERE id = %s",
             10
         )
-        # prints columns values of first row
+        # prints column values of the first row
         print(res[0])
+except Exception:
+    ...
 ```
 
 ## Other transforming tools
@@ -44,4 +48,4 @@ If you want to transform the data before loading, you can use Python. If you wan
 data after loading, you can use SQL or one of the following:
 
 1. [dbt](dbt/dbt.md) (recommended).
-1. [Pandas.](pandas.md)
+2. [Pandas.](pandas.md)
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md b/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md
index 0baf1917d1..a920b21a03 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md
@@ -45,7 +45,7 @@ Sources and resources that can be loaded using this verified source are:
 
 Upon logging into Airtable and accessing your base or table, you'll notice a URL in your browser's address bar resembling:
 
-```bash
+```sh
 https://airtable.com/appve10kl227BIT4GV/tblOUnZVLFWbemTP1/viw3qtF76bRQC3wKx/rec9khXgeTotgCQ62?blocks=hide
 ```
 
@@ -67,7 +67,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init airtable duckdb
    ```
 
@@ -116,20 +116,20 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python airtable_pipeline.py
    ```
 
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -147,13 +147,14 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 This function retrieves tables from given Airtable base.
 
-```python
+```py
 @dlt.source
 def airtable_source(
     base_id: str = dlt.config.value,
     table_names: Optional[List[str]] = None,
     access_token: str = dlt.secrets.value,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `base_id`: The base's unique identifier.
@@ -167,12 +168,13 @@ tables in the schema are loaded.
 
 This function retrieves data from a single Airtable table.
 
-```python
+```py
 def airtable_resource(
     api: pyairtable.Api,
     base_id: str,
     table: Dict[str, Any],
 ) -> DltResource:
+   ...
 ```
 
 `table`: Airtable metadata, excluding actual records.
@@ -186,7 +188,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
         pipeline_name="airtable",  # Use a custom name if desired
         destination="duckdb",      # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -196,16 +198,16 @@ verified source.
 
 1. To load the entire base:
 
-   ```python
+   ```py
    base_id = "Please set me up!"     # The id of the base.
 
-   airtables = airtable_source(base_id=base_id))
+   airtables = airtable_source(base_id=base_id)
    load_info = pipeline.run(load_data, write_disposition="replace")
    ```
 
 1. To load selected tables from a base table:
 
-   ```python
+   ```py
    base_id = "Please set me up!"     # The id of the base.
    table_names = ["Table1","Table2"] # A list of table IDs or table names to load.
 
@@ -221,7 +223,7 @@ verified source.
 
 1. To load data and apply hints to a specific column:
 
-   ```python
+   ```py
    base_id = "Please set me up!"       # The id of the base.
    table_names = ["Table1","Table2"]   # A list of table IDs or table names to load.
    resource_name = "Please set me up!" # The table name we want to apply hints.
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/amazon_kinesis.md b/docs/website/docs/dlt-ecosystem/verified-sources/amazon_kinesis.md
index 2fb97ff320..2894c15b5e 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/amazon_kinesis.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/amazon_kinesis.md
@@ -9,7 +9,7 @@ keywords: [amazon kinesis, verified source]
 :::info Need help deploying these sources, or figuring out how to run them in your data stack?
 
 [Join our Slack community](https://dlthub.com/community)
-or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer Adrian.
+or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer, Adrian.
 :::
 
 [Amazon Kinesis](https://docs.aws.amazon.com/streams/latest/dev/key-concepts.html) is a cloud-based
@@ -36,7 +36,7 @@ You can check out our pipeline example
 
 ### Grab credentials
 
-To use this verified source you need AWS `Access key` and `Secret access key`, that can be obtained
+To use this verified source, you need an AWS `Access key` and `Secret access key`, which can be obtained
 as follows:
 
 1. Sign in to your AWS Management Console.
@@ -57,7 +57,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init kinesis duckdb
    ```
 
@@ -110,19 +110,19 @@ For more information, read [Credentials](../../general-usage/credentials).
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python kinesis_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
-   For example, the `pipeline_name` for the above pipeline example is `kinesis_pipeline`, you may
+   For example, the `pipeline_name` for the above pipeline example is `kinesis_pipeline`. You may
    also use any custom name instead.
 
 For more information, read [Run a pipeline.](../../walkthroughs/run-a-pipeline)
@@ -138,7 +138,7 @@ This resource reads a Kinesis stream and yields messages. It supports
 [incremental loading](../../general-usage/incremental-loading) and parses messages as json by
 default.
 
-```python
+```py
 @dlt.resource(
     name=lambda args: args["stream_name"],
     primary_key="_kinesis_msg_id",
@@ -156,6 +156,7 @@ def kinesis_stream(
     parse_json: bool = True,
     chunk_size: int = 1000,
 ) -> Iterable[TDataItem]:
+    ...
 ```
 
 `stream_name`: Name of the Kinesis stream. Defaults to config/secrets if unspecified.
@@ -178,7 +179,7 @@ def kinesis_stream(
 
 You create a resource `kinesis_stream` by passing the stream name and a few other options. The
 resource will have the same name as the stream. When you iterate this resource (or pass it to
-`pipeline.run` records) it will query Kinesis for all the shards in the requested stream. For each
+`pipeline.run` records), it will query Kinesis for all the shards in the requested stream. For each
  shard, it will create an iterator to read messages:
 
 1. If `initial_at_timestamp` is present, the resource will read all messages after this timestamp.
@@ -192,7 +193,7 @@ will load messages incrementally:
 1. For shards that didn't have messages (or new shards), the last run time is used to get messages.
 
 Please check the `kinesis_stream` [docstring](https://github.com/dlt-hub/verified-sources/blob/master/sources/kinesis/__init__.py#L31-L46)
-for additional options, i.e. to limit the number of messages
+for additional options, i.e., to limit the number of messages
 returned or to automatically parse JSON messages.
 
 ### Kinesis message format
@@ -212,7 +213,7 @@ verified source.
 1. Configure the [pipeline](../../general-usage/pipeline) by specifying the pipeline name,
    destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="kinesis_pipeline",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -220,9 +221,9 @@ verified source.
    )
    ```
 
-1. To load messages from a stream from last one hour:
+1. To load messages from a stream from the last one hour:
 
-   ```python
+   ```py
    # the resource below will take its name from the stream name,
    # it can be used multiple times by default it assumes that Data is json and parses it,
    # here we disable that to just get bytes in data elements of the message
@@ -237,7 +238,7 @@ verified source.
 
 1. For incremental Kinesis streams, to fetch only new messages:
 
-   ```python
+   ```py
    #running pipeline will get only new messages
    info = pipeline.run(kinesis_stream_data)
    message_counts = pipeline.last_trace.last_normalize_info.row_counts
@@ -249,7 +250,7 @@ verified source.
 
 1. To parse json with a simple decoder:
 
-   ```python
+   ```py
    def _maybe_parse_json(item: TDataItem) -> TDataItem:
        try:
            item.update(json.loadb(item["data"]))
@@ -263,7 +264,7 @@ verified source.
 
 1. To read Kinesis messages and send them somewhere without using a pipeline:
 
-   ```python
+   ```py
    from dlt.common.configuration.container import Container
    from dlt.common.pipeline import StateInjectableContext
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/arrow-pandas.md b/docs/website/docs/dlt-ecosystem/verified-sources/arrow-pandas.md
index df968422d7..915a9d297a 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/arrow-pandas.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/arrow-pandas.md
@@ -25,7 +25,7 @@ To write an Arrow source, pass any `pyarrow.Table`, `pyarrow.RecordBatch` or `pa
 
 This example loads a Pandas dataframe to a Snowflake table:
 
-```python
+```py
 import dlt
 from dlt.common import pendulum
 import pandas as pd
@@ -45,7 +45,7 @@ pipeline.run(df, table_name="orders")
 
 A `pyarrow` table can be loaded in the same way:
 
-```python
+```py
 import pyarrow as pa
 
 # Create dataframe and pipeline same as above
@@ -96,7 +96,7 @@ Usage is the same as without other dlt resources. Refer to the [incremental load
 
 Example:
 
-```python
+```py
 import dlt
 from dlt.common import pendulum
 import pandas as pd
@@ -144,7 +144,7 @@ All struct types are represented as `complex` and will be loaded as JSON (if des
 even if they are present in the destination.
 
 If you want to represent nested data as separated tables, you must yield panda frames and arrow tables as records. In the examples above:
-```python
+```py
 # yield panda frame as records
 pipeline.run(df.to_dict(orient='records'), table_name="orders")
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/asana.md b/docs/website/docs/dlt-ecosystem/verified-sources/asana.md
index 8554cdd376..9e3ee9c8fe 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/asana.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/asana.md
@@ -56,7 +56,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init asana_dlt duckdb
    ```
 
@@ -94,16 +94,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python asana_dlt_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `asana`, you may also use any
@@ -127,7 +127,7 @@ it is important to note the complete list of the default endpoints given in
 This is a `dlt.source` function, which returns a list of DltResource objects: "workspaces",
 "projects", "sections","tags","tasks","stories", "teams", and "users".
 
-```python
+```py
 @dlt.source
 def asana_source(access_token: str = dlt.secrets.value) -> Any:
     return [
@@ -142,7 +142,7 @@ def asana_source(access_token: str = dlt.secrets.value) -> Any:
 
 This is a `dlt.resource` function, which returns collections of tasks and related information.
 
-```python
+```py
 @dlt.resource(write_disposition="replace")
 def workspaces(
     access_token: str = dlt.secrets.value,
@@ -171,7 +171,7 @@ transformer functions transform or process data from one or more resources.
 The transformer function `projects` process data from the `workspaces` resource. It
 fetches and returns a list of projects for a given workspace from Asana.
 
-```python
+```py
 @dlt.transformer(
     data_from=workspaces,
     write_disposition="replace",
@@ -200,7 +200,7 @@ It uses `@dlt.defer` decorator to enable parallel run in thread pool.
 This [incremental](../../general-usage/incremental-loading.md) resource-transformer fetches all
 tasks for a given project from Asana.
 
-```python
+```py
 @dlt.transformer(data_from=projects, write_disposition="merge", primary_key="gid")
 def tasks(
     project_array: t.List[TDataItem],
@@ -235,7 +235,7 @@ these steps:
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="asana_pipeline",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -248,13 +248,13 @@ these steps:
 
 1. To load the data from all the fields, you can utilise the `asana_source` method as follows:
 
-   ```python
+   ```py
    load_data = asana_source()
    ```
 
 1. Use the method `pipeline.run()` to execute the pipeline.
 
-   ```python
+   ```py
    load_info = pipeline.run(load_data)
    # print the information on data that was loaded
    print(load_info)
@@ -263,7 +263,7 @@ these steps:
 1. To use the method `pipeline.run()` to load custom endpoints “workspaces” and “projects”, the
    above script may be modified as:
 
-   ```python
+   ```py
    load_info = pipeline.run(load_data.with_resources("workspaces", "projects"))
    # print the information on data that was loaded
    print(load_info)
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/chess.md b/docs/website/docs/dlt-ecosystem/verified-sources/chess.md
index 7f01b83f08..2341680d97 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/chess.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/chess.md
@@ -36,7 +36,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init chess duckdb
    ```
 
@@ -66,20 +66,20 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python chess_pipeline.py
    ```
 
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -98,7 +98,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 This is a `dlt.source` function for the Chess.com API named "chess", which returns a sequence of
 DltResource objects. That we'll discuss in subsequent sections as resources.
 
-```python
+```py
 dlt.source(name="chess")
 def source(
     players: List[str], start_month: str = None, end_month: str = None
@@ -120,7 +120,7 @@ to fetch game data (in "YYYY/MM" format).
 
 This is a `dlt.resource` function, which returns player profiles for a list of player usernames.
 
-```python
+```py
 @dlt.resource(write_disposition="replace")
 def players_profiles(players: List[str]) -> Iterator[TDataItem]:
 
@@ -138,7 +138,7 @@ It uses `@dlt.defer` decorator to enable parallel run in thread pool.
 
 This is a `dlt.resource` function, which returns url to game archives for specified players.
 
-```python
+```py
 @dlt.resource(write_disposition="replace", selected=False)
 def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
     ...
@@ -154,7 +154,7 @@ runs.
 This incremental resource takes data from players and returns games for the last month if not
 specified otherwise.
 
-```python
+```py
 @dlt.resource(write_disposition="append")
 def players_games(
     players: List[str], start_month: str = None, end_month: str = None
@@ -186,7 +186,7 @@ To create your data loading pipeline for players and load data, follow these ste
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="chess_pipeline", # Use a custom name if desired
        destination="duckdb", # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -199,7 +199,7 @@ To create your data loading pipeline for players and load data, follow these ste
 
 1. To load the data from all the resources for specific players (e.g. for November), you can utilise the `source` method as follows:
 
-   ```python
+   ```py
    # Loads games for Nov 2022
    data = source(
        ["magnuscarlsen", "vincentkeymer", "dommarajugukesh", "rpragchess"],
@@ -210,7 +210,7 @@ To create your data loading pipeline for players and load data, follow these ste
 
 1. Use the method `pipeline.run()` to execute the pipeline.
 
-   ```python
+   ```py
    info = pipeline.run(data)
    # print the information on data that was loaded
    print(info)
@@ -219,7 +219,7 @@ To create your data loading pipeline for players and load data, follow these ste
 1. To load data from specific resources like "players_games" and "player_profiles", modify the above
    code as:
 
-   ```python
+   ```py
    info = pipeline.run(data.with_resources("players_games", "players_profiles"))
    # print the information on data that was loaded
    print(info)
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md b/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md
index dea97921b4..0a0c64fb30 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md
@@ -66,9 +66,9 @@ By default, Facebook access tokens have a short lifespan of one hour. To exchang
 Facebook access token for a long-lived token, update the `.dlt/secrets.toml` with client_id, and
 client_secret and execute the provided Python code.
 
-```python
+```py
 from facebook_ads import get_long_lived_token
-print(get_long_lived_token("your short-lived token")
+print(get_long_lived_token("your short-lived token"))
 ```
 
 Replace the `access_token` in the `.dlt/secrets.toml` file with the long-lived token obtained from
@@ -77,7 +77,7 @@ the above code snippet.
 To retrieve the expiry date and the associated scopes of the token, you can use the following
 command:
 
-```python
+```py
 from facebook_ads import debug_access_token
 debug_access_token()
 ```
@@ -88,7 +88,7 @@ level. In `config.toml` / `secrets.toml`:
 
 ```toml
 [sources.facebook_ads]
-access_token_expires_at=1688821881...
+access_token_expires_at=1688821881
 ```
 
 > Note: The Facebook UI, which is described here, might change.
@@ -101,7 +101,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init facebook_ads duckdb
    ```
 
@@ -158,16 +158,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python facebook_ads_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `facebook_ads`, you may also
@@ -191,7 +191,7 @@ it is important to note the complete list of the default endpoints given in
 This function returns a list of resources to load campaigns, ad sets, ads, creatives, and ad leads
 data from Facebook Marketing API.
 
-```python
+```py
 @dlt.source(name="facebook_ads")
 def facebook_ads_source(
     account_id: str = dlt.config.value,
@@ -200,6 +200,7 @@ def facebook_ads_source(
     request_timeout: float = 300.0,
     app_api_version: str = None,
 ) -> Sequence[DltResource]:
+   ...
 ```
 
 `account_id`: Account id associated with add manager, configured in "config.toml".
@@ -220,7 +221,7 @@ were issued i.e. 'v17.0'. Defaults to the _facebook_business_ library default ve
 The ads function fetches ad data. It retrieves ads from a specified account with specific fields and
 states.
 
-```python
+```py
 @dlt.resource(primary_key="id", write_disposition="replace")
 def ads(
     fields: Sequence[str] = DEFAULT_AD_FIELDS,
@@ -254,7 +255,7 @@ The default fields are defined in
 
 This function returns a list of resources to load facebook_insights.
 
-```python
+```py
 @dlt.source(name="facebook_ads")
 def facebook_insights_source(
     account_id: str = dlt.config.value,
@@ -271,6 +272,7 @@ def facebook_insights_source(
     request_timeout: int = 300,
     app_api_version: str = None,
 ) -> DltResource:
+   ...
 ```
 
 `account_id`: Account id associated with ads manager, configured in _config.toml_.
@@ -315,13 +317,14 @@ were issued i.e. 'v17.0'. Defaults to the facebook_business library default vers
 This function fetches Facebook insights data incrementally from a specified start date until the
 current date, in day steps.
 
-```python
+```py
 @dlt.resource(primary_key=INSIGHTS_PRIMARY_KEY, write_disposition="merge")
 def facebook_insights(
     date_start: dlt.sources.incremental[str] = dlt.sources.incremental(
         "date_start", initial_value=initial_load_start_date_str
     )
 ) -> Iterator[TDataItems]:
+   ...
 ```
 
 `date_start`: Parameter sets the initial value for the "date_start" parameter in
@@ -337,7 +340,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="facebook_ads",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -350,7 +353,7 @@ verified source.
 
 1. To load all the data from, campaigns, ad sets, ads, ad creatives and leads.
 
-   ```python
+   ```py
    load_data = facebook_ads_source()
    load_info = pipeline.run(load_data)
    print(load_info)
@@ -359,7 +362,7 @@ verified source.
 1. To merge the Facebook Ads with the state “DISAPPROVED” and with ads state “PAUSED” you can do the
    following:
 
-   ```python
+   ```py
    load_data = facebook_ads_source()
    # It is recommended to enable root key propagation on a source that is not a merge one by default. this is not required if you always use merge but below we start with replace
    load_data.root_key = True
@@ -382,7 +385,7 @@ verified source.
 1. To load data with a custom field, for example, to load only “id” from Facebook ads, you can do
    the following:
 
-   ```python
+   ```py
    load_data = facebook_ads_source()
    # Only loads add ids, works the same for campaigns, leads etc.
    load_data.ads.bind(fields=("id",))
@@ -395,7 +398,7 @@ verified source.
    demonstrates how to enrich objects by adding an enrichment transformation that includes
    additional fields.
 
-   ```python
+   ```py
    # You can reduce the chunk size for smaller requests
    load_data = facebook_ads_source(chunk_size=2)
 
@@ -429,7 +432,7 @@ verified source.
    breakdowns, etc. As defined in the `facebook_insights_source`. This function generates daily
    reports for a specified number of past days.
 
-   ```python
+   ```py
    load_data = facebook_insights_source(
        initial_load_past_days=30,
        attribution_window_days_lag= 7,
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
index aed19838ef..bf3d23d0a3 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md
@@ -81,7 +81,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init filesystem duckdb
    ```
 
@@ -150,32 +150,32 @@ For more information, read the
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. Install optional modules:
 
    - For AWS S3:
-     ```bash
+     ```sh
      pip install s3fs
      ```
    - For Azure blob:
-     ```bash
+     ```sh
      pip install adlfs>=2023.9.0
      ```
    - GCS storage: No separate module needed.
 
 1. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python filesystem_pipeline.py
    ```
 
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -197,13 +197,14 @@ This source offers chunked file readers as resources, which can be optionally cu
 - `read_jsonl()`
 - `read_parquet()`
 
-```python
+```py
 @dlt.source(_impl_cls=ReadersSource, spec=FilesystemConfigurationResource)
 def readers(
     bucket_url: str = dlt.secrets.value,
     credentials: Union[FileSystemCredentials, AbstractFileSystem] = dlt.secrets.value,
     file_glob: Optional[str] = "*",
 ) -> Tuple[DltResource, ...]:
+   ...
 ```
 
 - `bucket_url`: The url to the bucket.
@@ -225,7 +226,7 @@ This resource lists files in `bucket_url` based on the `file_glob` pattern, retu
 [FileItem](https://github.com/dlt-hub/dlt/blob/devel/dlt/common/storages/fsspec_filesystem.py#L22)
 with data access methods. These can be paired with transformers for enhanced processing.
 
-```python
+```py
 @dlt.resource(
     primary_key="file_url", spec=FilesystemConfigurationResource, standalone=True
 )
@@ -236,6 +237,7 @@ def filesystem(
     files_per_page: int = DEFAULT_CHUNK_SIZE,
     extract_content: bool = False,
 ) -> Iterator[List[FileItem]]:
+   ...
 ```
 
 - `bucket_url`: URL of the bucket.
@@ -256,9 +258,9 @@ in bucket URL.
 
 To load data into a specific table (instead of the default filesystem table), see the snippet below:
 
-```python
+```py
 @dlt.transformer(standalone=True)
-def read_csv(items, chunksize: int = 15) ->:
+def read_csv(items, chunksize: int = 15):
     """Reads csv file with Pandas chunk by chunk."""
     ...
 
@@ -275,7 +277,7 @@ Use the
 [standalone filesystem](../../general-usage/resource#declare-a-standalone-resource)
 resource to list files in s3, GCS, and Azure buckets. This allows you to customize file readers or
 manage files using [fsspec](https://filesystem-spec.readthedocs.io/en/latest/index.html).
-```python
+```py
 files = filesystem(bucket_url="s3://my_bucket/data", file_glob="csv_folder/*.csv")
 pipeline.run(files)
 ```
@@ -327,7 +329,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
         pipeline_name="standard_filesystem",  # Use a custom name if desired
         destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -337,17 +339,17 @@ verified source.
 
 1. To read and load CSV files:
 
-   ```python
+   ```py
    BUCKET_URL = "YOUR_BUCKET_PATH_HERE"   # path of the bucket url or local destination
    met_files = readers(
         bucket_url=BUCKET_URL, file_glob="directory/*.csv"
-    ).read_csv()
-    # tell dlt to merge on date
-    met_files.apply_hints(write_disposition="merge", merge_key="date")
-    # We load the data into the met_csv table
-    load_info = pipeline.run(met_files.with_name("table_name"))
-    print(load_info)
-    print(pipeline.last_trace.last_normalize_info)
+      ).read_csv()
+   # tell dlt to merge on date
+   met_files.apply_hints(write_disposition="merge", merge_key="date")
+   # We load the data into the met_csv table
+   load_info = pipeline.run(met_files.with_name("table_name"))
+   print(load_info)
+   print(pipeline.last_trace.last_normalize_info)
    ```
 
     - The `file_glob` parameter targets all CSVs in the "met_csv/A801" directory.
@@ -358,7 +360,7 @@ verified source.
     :::
 1. To load only new CSV files with [incremental loading](../../general-usage/incremental-loading):
 
-   ```python
+   ```py
    # This configuration will only consider new csv files
    new_files = filesystem(bucket_url=BUCKET_URL, file_glob="directory/*.csv")
    # add incremental on modification time
@@ -369,7 +371,7 @@ verified source.
    ```
 
 1. To read and load Parquet and JSONL from a bucket:
-   ```python
+   ```py
    jsonl_reader = readers(BUCKET_URL, file_glob="**/*.jsonl").read_jsonl(
         chunksize=10000
     )
@@ -391,7 +393,7 @@ verified source.
 
 1. To set up a pipeline that reads from an Excel file using a standalone transformer:
 
-   ```python
+   ```py
    # Define a standalone transformer to read data from an Excel file.
    @dlt.transformer(standalone=True)
    def read_excel(
@@ -427,7 +429,7 @@ verified source.
 
 1. To copy files locally, add a step in the filesystem resource and then load the listing to the database:
 
-   ```python
+   ```py
     def _copy(item: FileItemDict) -> FileItemDict:
          # instantiate fsspec and copy file
          dest_file = os.path.join(local_folder, item["file_name"])
@@ -459,7 +461,7 @@ verified source.
    You can get a fsspec client from filesystem resource after it was extracted i.e. in order to delete processed files etc.
    The filesystem module contains a convenient method `fsspec_from_resource` that can be used as follows:
 
-      ```python
+      ```py
       from filesystem import filesystem, fsspec_from_resource
       # get filesystem source
       gs_resource = filesystem("gs://ci-test-bucket/")
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/github.md b/docs/website/docs/dlt-ecosystem/verified-sources/github.md
index 2fd0277500..4c9a322760 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/github.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/github.md
@@ -67,7 +67,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init github duckdb
    ```
 
@@ -110,16 +110,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python github_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `github_reactions`, you may
@@ -137,7 +137,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 This `dlt.source` function uses GraphQL to fetch DltResource objects: issues and pull requests along
 with associated reactions, comments, and reactions to comments.
 
-```python
+```py
 @dlt.source
 def github_reactions(
     owner: str,
@@ -147,6 +147,7 @@ def github_reactions(
     max_items: int = None,
     max_item_age_seconds: float = None,
 ) -> Sequence[DltResource]:
+   ...
 ```
 
 `owner`: Refers to the owner of the repository.
@@ -169,7 +170,7 @@ yet to be implemented. Defaults to None.
 The `dlt.resource` function employs the `_get_reactions_data` method to retrieve data about issues,
 their associated comments, and subsequent reactions.
 
-```python
+```py
 dlt.resource(
     _get_reactions_data(
         "issues",
@@ -193,11 +194,12 @@ on event type. It loads new events only and appends them to tables.
 > Note: Github allows retrieving up to 300 events for public repositories, so frequent updates are
 > recommended for active repos.
 
-```python
+```py
 @dlt.source(max_table_nesting=2)
 def github_repo_events(
     owner: str, name: str, access_token: str = None
 ) -> DltResource:
+   ...
 ```
 
 `owner`: Refers to the owner of the repository.
@@ -216,13 +218,14 @@ Read more about [nesting levels](../../general-usage/source#reduce-the-nesting-l
 This `dlt.resource` function serves as the resource for the `github_repo_events` source. It yields
 repository events as data items.
 
-```python
+```py
 dlt.resource(primary_key="id", table_name=lambda i: i["type"])  # type: ignore
 def repo_events(
     last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
         "created_at", initial_value="1970-01-01T00:00:00Z", last_value_func=max
     )
 ) -> Iterator[TDataItems]:
+   ...
 ```
 
 `primary_key`: Serves as the primary key, instrumental in preventing data duplication.
@@ -244,7 +247,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="github_pipeline",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -258,7 +261,7 @@ verified source.
 1. To load all the data from repo on issues, pull requests, their comments and reactions, you can do
    the following:
 
-   ```python
+   ```py
    load_data = github_reactions("duckdb", "duckdb")
    load_info = pipeline.run(load_data)
    print(load_info)
@@ -267,7 +270,7 @@ verified source.
 
 1. To load only the first 100 issues, you can do the following:
 
-   ```python
+   ```py
    load_data = github_reactions("duckdb", "duckdb", max_items=100)
    load_info = pipeline.run(load_data.with_resources("issues"))
    print(load_info)
@@ -276,7 +279,7 @@ verified source.
 1. You can use fetch and process repo events data incrementally. It loads all data during the first
    run and incrementally in subsequent runs.
 
-   ```python
+   ```py
    load_data = github_repo_events(
        "duckdb", "duckdb", access_token=os.getenv(ACCESS_TOKEN)
    )
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/google_analytics.md b/docs/website/docs/dlt-ecosystem/verified-sources/google_analytics.md
index 02d7803a9b..2d8be0b15d 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/google_analytics.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/google_analytics.md
@@ -12,7 +12,7 @@ or application.
 
 This Google Analytics `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/google_analytics_pipeline.py)
-loads data using “Google Analytics API” to the destination of your choice.
+loads data using the "Google Analytics API" to the destination of your choice.
 
 Sources and resources that can be loaded using this verified source are:
 
@@ -29,7 +29,7 @@ Sources and resources that can be loaded using this verified source are:
 There are two methods to get authenticated for using this verified source:
 
 - OAuth credentials
-- Service account credential
+- Service account credentials
 
 Let's go over how to set up both OAuth tokens and service account credentials. In general, OAuth
 tokens are preferred when user consent is required, while service account credentials are better
@@ -39,14 +39,14 @@ requirement.
 ### Grab Google service account credentials
 
 You need to create a GCP service account to get API credentials if you don't have one. To create
- one, follow these steps:
+one, follow these steps:
 
 1. Sign in to [console.cloud.google.com](http://console.cloud.google.com/).
 
 1. [Create a service account](https://cloud.google.com/iam/docs/service-accounts-create#creating) if
    needed.
 
-1. Enable "Google Analytics API", refer
+1. Enable the "Google Analytics API". Refer to the
    [Google documentation](https://support.google.com/googleapi/answer/6158841?hl=en) for
    comprehensive instructions on this process.
 
@@ -58,7 +58,7 @@ You need to create a GCP service account to get API credentials if you don't hav
    1. Create a new JSON key by selecting "Manage Keys" > "ADD KEY" > "CREATE".
    1. You can download the ".json" file containing the necessary credentials for future use.
 
-### Grab google OAuth credentials
+### Grab Google OAuth credentials
 
 You need to create a GCP account to get OAuth credentials if you don't have one. To create one,
 follow these steps:
@@ -69,31 +69,31 @@ follow these steps:
 
 1. Enable the Analytics API in the project.
 
-1. Search credentials in the search bar and go to Credentials.
+1. Search for credentials in the search bar and go to Credentials.
 
 1. Go to Credentials -> OAuth client ID -> Select Desktop App from the Application type and give an
    appropriate name.
 
-1. Download the credentials and fill "client_id", "client_secret" and "project_id" in
+1. Download the credentials and fill in "client_id", "client_secret", and "project_id" in
    "secrets.toml".
 
 1. Go back to credentials and select the OAuth consent screen on the left.
 
-1. Fill in the App name, user support email(your email), authorized domain (localhost.com), and dev
+1. Fill in the App name, user support email (your email), authorized domain (localhost.com), and dev
    contact info (your email again).
 
 1. Add the following scope:
 
-   ```
+   ```text
    "https://www.googleapis.com/auth/analytics.readonly"
    ```
 
 1. Add your email as a test user.
 
-After configuring "client_id", "client_secret" and "project_id" in "secrets.toml". To generate the
+After configuring "client_id", "client_secret", and "project_id" in "secrets.toml", to generate the
 refresh token, run the following script from the root folder:
 
-```bash
+```sh
 python google_analytics/setup_script_gcp_oauth.py
 ```
 
@@ -128,7 +128,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init google_analytics duckdb
    ```
 
@@ -214,16 +214,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python google_analytics_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is
@@ -239,9 +239,9 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 ### Source `simple_load`
 
 This function returns a list of resources including metadata, metrics, and dimensions data from
-Google Analytics API.
+the Google Analytics API.
 
-```python
+```py
 @dlt.source(max_table_nesting=2)
 def google_analytics(
     credentials: Union[ GcpOAuthCredentials, GcpServiceAccountCredential ] = dlt.secrets.value,
@@ -250,6 +250,7 @@ def google_analytics(
     start_date: Optional[str] = START_DATE,
     rows_per_page: int = 1000,
 ) -> List[DltResource]:
+   ...
 ```
 
 `credentials`: GCP OAuth or service account credentials.
@@ -269,9 +270,10 @@ set to 1000.
 
 This function retrieves all the metrics and dimensions for a report from a Google Analytics project.
 
-```python
+```py
 @dlt.resource(selected=False)
 def get_metadata(client: Resource, property_id: int) -> Iterator[Metadata]:
+   ...
 ```
 
 `client`: This is the Google Analytics client used to make requests.
@@ -284,7 +286,7 @@ def get_metadata(client: Resource, property_id: int) -> Iterator[Metadata]:
 This transformer function extracts data using metadata and populates a table called "metrics" with
 the data from each metric.
 
-```python
+```py
 @dlt.transformer(data_from=get_metadata, write_disposition="replace", name="metrics")
 def metrics_table(metadata: Metadata) -> Iterator[TDataItem]:
     for metric in metadata.metrics:
@@ -293,7 +295,7 @@ def metrics_table(metadata: Metadata) -> Iterator[TDataItem]:
 
 `metadata`: GA4 metadata is stored in this "Metadata" class object.
 
-Similarly, there is a transformer function called `dimensions_table` that populates table called
+Similarly, there is a transformer function called `dimensions_table` that populates a table called
 "dimensions" with the data from each dimension.
 
 ## Customization
@@ -304,7 +306,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="google_analytics",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -317,7 +319,7 @@ verified source.
 
 1. To load all the data from metrics and dimensions:
 
-   ```python
+   ```py
    load_data = google_analytics()
    load_info = pipeline.run(load_data)
    print(load_info)
@@ -328,9 +330,9 @@ verified source.
 
 1. To load data from a specific start date:
 
-   ```python
+   ```py
    load_data = google_analytics(start_date='2023-01-01')
-   load_info = pipeline.run(load_data).
+   load_info = pipeline.run(load_data)
    print(load_info)
    ```
 
@@ -349,4 +351,4 @@ verified source.
 - [Load data from Google Analytics to Databricks in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-databricks)
 - [Load data from Google Analytics to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-postgres)
 - [Load data from Google Analytics to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-athena)
-<!--@@@DLT_SNIPPET_END tuba::google_analytics-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::google_analytics-->
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md b/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md
index 2a5d4b03ab..be12f5aea4 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md
@@ -87,7 +87,7 @@ follow these steps:
 
 1. Add the following scope:
 
-   ```
+   ```text
    "https://www.googleapis.com/auth/spreadsheets.readonly"
    ```
 
@@ -98,7 +98,7 @@ follow these steps:
    After configuring "client_id", "client_secret" and "project_id" in "secrets.toml". To generate
    the refresh token, run the following script from the root folder:
 
-   ```bash
+   ```sh
    python google_sheets/setup_script_gcp_oauth.py
    ```
 
@@ -128,13 +128,13 @@ following:
 
 When setting up the pipeline, you can use either the browser-copied URL of your spreadsheet:
 
-```bash
+```sh
 https://docs.google.com/spreadsheets/d/1VTtCiYgxjAwcIw7UM1_BSaxC3rzIpr0HwXZwd2OlPD4/edit?usp=sharing
 ```
 
 or spreadsheet id (which is a part of the url)
 
-```bash
+```sh
 1VTtCiYgxjAwcIw7UM1_BSaxC3rzIpr0HwXZwd2OlPD4
 ```
 
@@ -183,7 +183,7 @@ converted into tables, named after them and stored in the destination.
 
 1. In range_names, you can enter as follows:
 
-   ```
+   ```text
    range_names = ["Range_1","Range_2","Sheet1!A1:D10"]
    ```
 
@@ -214,7 +214,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init google_sheets duckdb
    ```
 
@@ -296,20 +296,20 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python google_sheets_pipeline.py
    ```
 
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -328,7 +328,7 @@ Also, since recently `dlt`'s no longer recognizing date and time types, so you h
 Use the `apply_hints` method on the resource to achieve this.
 Here's how you can do it:
 
-```python
+```py
 for resource in resources:
     resource.apply_hints(columns={
         "total_amount": {"data_type": "double"},
@@ -340,7 +340,7 @@ This will ensure that all values in the `total_amount` column are treated as `do
 And `date` column will be represented as dates, not integers.
 
 For a single resource (e.g. `Sheet1`), you can simply use:
-```python
+```py
 source.Sheet1.apply_hints(columns={
     "total_amount": {"data_type": "double"},
     "date": {"data_type": "timestamp"},
@@ -348,7 +348,7 @@ source.Sheet1.apply_hints(columns={
 ```
 
 To get the name of resources, you can use:
-```python
+```py
 print(source.resources.keys())
 ```
 
@@ -371,7 +371,7 @@ or set `full_refresh=True`.
 This function loads data from a Google Spreadsheet. It retrieves data from all specified ranges,
 whether explicitly defined or named, and obtains metadata for the first two rows within each range.
 
-```python
+```py
 def google_spreadsheet(
       spreadsheet_url_or_id: str = dlt.config.value,
       range_names: Sequence[str] = dlt.config.value,
@@ -381,6 +381,7 @@ def google_spreadsheet(
       get_sheets: bool = False,
       get_named_ranges: bool = True,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `spreadsheet_url_or_id`: ID or URL of the Google Spreadsheet.
@@ -399,7 +400,7 @@ def google_spreadsheet(
 This function processes each range name provided by the source function, loading its data into
 separate tables in the destination.
 
-```python
+```py
 dlt.resource(
      process_range(rows_data, headers=headers, data_types=data_types),
      name=name,
@@ -429,7 +430,7 @@ This table refreshes after each load, storing information on loaded ranges:
 - Range name as given to the source.
 - String and parsed representation of the loaded range.
 
-```python
+```py
 dlt.resource(
      metadata_table,
      write_disposition="merge",
@@ -457,7 +458,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
         pipeline_name="google_sheets",  # Use a custom name if desired
         destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -467,7 +468,7 @@ verified source.
 
 1. To load data from explicit range names:
 
-   ```python
+   ```py
    load_data = google_spreadsheet(
         "https://docs.google.com/spreadsheets/d/1HhWHjqouQnnCIZAFa2rL6vT91YRN8aIhts22SUUR580/edit#gid=0", #Spreadsheet URL
         range_names=["range_name1", "range_name2"], # Range names
@@ -483,7 +484,7 @@ verified source.
 
 1. To load all the range_names from spreadsheet:
 
-   ```python
+   ```py
    load_data = google_spreadsheet(
         "https://docs.google.com/spreadsheets/d/1HhWHjqouQnnCIZAFa2rL6vT91YRN8aIhts22SUUR580/edit#gid=0", #Spreadsheet URL
         get_sheets=False,
@@ -497,7 +498,7 @@ verified source.
 
 1. To load all the sheets from spreadsheet:
 
-   ```python
+   ```py
    load_data = google_spreadsheet(
         "https://docs.google.com/spreadsheets/d/1HhWHjqouQnnCIZAFa2rL6vT91YRN8aIhts22SUUR580/edit#gid=0", #Spreadsheet URL
         get_sheets=True,
@@ -511,7 +512,7 @@ verified source.
 
 1. To load all the sheets and range_names:
 
-   ```python
+   ```py
    load_data = google_spreadsheet(
         "https://docs.google.com/spreadsheets/d/1HhWHjqouQnnCIZAFa2rL6vT91YRN8aIhts22SUUR580/edit#gid=0", #Spreadsheet URL
         get_sheets=True,
@@ -525,7 +526,7 @@ verified source.
 
 1. To load data from multiple spreadsheets:
 
-   ```python
+   ```py
    load_data1 = google_spreadsheet(
         "https://docs.google.com/spreadsheets/d/43lkHjqouQnnCIZAFa2rL6vT91YRN8aIhts22SUUR580/edit#gid=0", #Spreadsheet URL
         range_names=["Sheet 1!A1:B10"],
@@ -543,7 +544,7 @@ verified source.
 
 1. To load with table rename:
 
-   ```python
+   ```py
    load_data = google_spreadsheet(
     "https://docs.google.com/spreadsheets/d/43lkHjqouQnnCIZAFa2rL6vT91YRN8aIhts22SUUR580/edit#gid=0", #Spreadsheet URL
      range_names=["Sheet 1!A1:B10"],
@@ -554,7 +555,6 @@ verified source.
 
    load_info = pipeline.run(load_data)
    print(load_info)
-   }
    ```
 
 ### Using Airflow with Google Spreadsheets:
@@ -583,7 +583,7 @@ Below is the correct way to set up an Airflow DAG  for this purpose:
 
 - When adding the Google Spreadsheet task to the pipeline, avoid decomposing it; run it as a single task for efficiency.
 
-```python
+```py
 @dag(
     schedule_interval='@daily',
     start_date=pendulum.datetime(2023, 2, 1),
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md b/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md
index 3a623c7b49..8a6e1d1bb3 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md
@@ -55,7 +55,7 @@ Follow these steps:
 
    - Read scopes for CMS, CRM, and Settings.
    - Permissions for:
-    ```
+    ```text
     business-intelligence, actions, crm.export, e-commerce, oauth, tickets
     ```
 
@@ -74,7 +74,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init hubspot duckdb
    ```
 
@@ -115,16 +115,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python hubspot_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `hubspot_pipeline`, you may
@@ -148,12 +148,13 @@ it is important to note the complete list of the default endpoints given in
 This function returns a list of resources to load companies, contacts, deals, tickets, products, and
 web analytics events data into the destination.
 
-```python
+```py
 @dlt.source(name="hubspot")
 def hubspot(
     api_key: str = dlt.secrets.value,
     include_history: bool = False,
 ) -> Sequence[DltResource]:
+   ...
 ```
 
 `api_key`: The key used to authenticate with the HubSpot API. Configured in "secrets.toml".
@@ -166,7 +167,7 @@ specified entities.
 This resource function fetches data from the "companies" endpoint and loads it to
 the destination, replacing any existing data.
 
-```python
+```py
 @dlt.resource(name="companies", write_disposition="replace")
 def companies(
    api_key: str = api_key,
@@ -195,7 +196,7 @@ in addition to the custom properties. Similar to this, resource functions "conta
 
 This function loads web analytics events for specific objects from Hubspot API into the destination.
 
-```python
+```py
 @dlt.resource
 def hubspot_events_for_objects(
      object_type: THubspotObjectType,
@@ -203,6 +204,7 @@ def hubspot_events_for_objects(
      api_key: str = dlt.secrets.value,
      start_date: pendulum.DateTime = STARTDATE,
 ) -> DltResource:
+   ...
 ```
 
 `object_type`: One of the Hubspot object types as defined in
@@ -225,7 +227,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="hubspot",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -238,7 +240,7 @@ verified source.
 
 1. To load all the data from contacts, companies, deals, products, tickets, and quotes into the destination.
 
-   ```python
+   ```py
    load_data = hubspot()
    load_info = pipeline.run(load_data)
    print(load_info)
@@ -246,7 +248,7 @@ verified source.
 
 1. To load data from contacts and companies, with time history using "with_resources" method.
 
-   ```python
+   ```py
    load_data = hubspot(include_history=True).with_resources("companies","contacts")
    load_info = pipeline.run(load_data)
    print(load_info)
@@ -256,7 +258,7 @@ verified source.
 1. By default, all the custom properties of a CRM object are extracted. If you want only particular fields,
     set the flag `include_custom_props=False` and add a list of properties with the `props` arg.
 
-   ```python
+   ```py
    load_data = hubspot()
    load_data.contacts.bind(props=["date_of_birth", "degree"], include_custom_props=False)
    load_info = pipeline.run(load_data.with_resources("contacts"))
@@ -264,7 +266,7 @@ verified source.
 
 1. If you want to read all the custom properties of CRM objects and some additional (e.g. Hubspot driven) properties.
 
-   ```python
+   ```py
    load_data = hubspot()
    load_data.contacts.bind(props=["hs_content_membership_email", "hs_content_membership_email_confirmed"])
    load_info = pipeline.run(load_data.with_resources("contacts"))
@@ -273,7 +275,7 @@ verified source.
 
 1. To load the web analytics events of a given object type.
 
-   ```python
+   ```py
    resource = hubspot_events_for_objects("company", ["7086461639", "7086464459"])
    # Here, object type : company, and object ids : 7086461639 and 7086464459
    load_info = pipeline.run([resource])
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/inbox.md b/docs/website/docs/dlt-ecosystem/verified-sources/inbox.md
index 2aa1d1130f..668d1ec470 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/inbox.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/inbox.md
@@ -9,14 +9,14 @@ keywords: [inbox, inbox verified source, inbox mail, email]
 :::info Need help deploying these sources, or figuring out how to run them in your data stack?
 
 [Join our Slack community](https://dlthub.com/community)
-or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer Adrian.
+or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer, Adrian.
 :::
 
 This source collects inbox emails, retrieves attachments, and stores relevant email data. It uses the imaplib library for IMAP interactions and the dlt library for data processing.
 
 This Inbox `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/inbox_pipeline.py)
-loads data using “Inbox” verified source to the destination of your choice.
+load data using the “Inbox” verified source to the destination of your choice.
 
 Sources and resources that can be loaded using this verified source are:
 
@@ -36,14 +36,14 @@ Sources and resources that can be loaded using this verified source are:
    - "email_account": Associated email account name (e.g. dlthub@dlthub.com).
    - "password": APP password (for third-party clients) from the email provider.
 
-1. Host addresses and APP password procedures vary by provider and can be found via a quick Google search. For Google Mail's app password, read [here](https://support.google.com/mail/answer/185833?hl=en#:~:text=An%20app%20password%20is%20a,2%2DStep%20Verification%20turned%20on).
+2. Host addresses and APP password procedures vary by provider and can be found via a quick Google search. For Google Mail's app password, read [here](https://support.google.com/mail/answer/185833?hl=en#:~:text=An%20app%20password%20is%20a,2%2DStep%20Verification%20turned%20on).
 
-1. However, this guide covers Gmail inbox configuration; similar steps apply to other providers.
+3. However, this guide covers Gmail inbox configuration; similar steps apply to other providers.
 
 ### Accessing Gmail Inbox
 
 1. SMTP server DNS: 'imap.gmail.com' for Gmail.
-1. Port: 993 (for internet messaging access protocol over TLS/SSL).
+2. Port: 993 (for internet messaging access protocol over TLS/SSL).
 
 ### Grab App password for Gmail
 
@@ -52,12 +52,12 @@ Sources and resources that can be loaded using this verified source are:
 #### Steps to Create and Use App Passwords:
 
 1. Visit your Google Account > Security.
-1. Under "How you sign in to Google", enable 2-Step Verification.
-1. Choose App passwords at the bottom.
-1. Name the device for reference.
-1. Click Generate.
-1. Input the generated 16-character app password as prompted.
-1. Click Done.
+2. Under "How you sign in to Google", enable 2-Step Verification.
+3. Choose App passwords at the bottom.
+4. Name the device for reference.
+5. Click Generate.
+6. Input the generated 16-character app password as prompted.
+7. Click Done.
 
 Read more in [this article](https://pythoncircle.com/post/727/accessing-gmail-inbox-using-python-imaplib-module/) or [Google official documentation.](https://support.google.com/mail/answer/185833#zippy=%2Cwhy-you-may-need-an-app-password)
 
@@ -67,7 +67,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init inbox duckdb
    ```
 
@@ -76,10 +76,10 @@ To get started with your data pipeline, follow these steps:
    with Inbox as the [source](../../general-usage/source) and
    [duckdb](../destinations/duckdb.md) as the [destination](../destinations).
 
-1. If you'd like to use a different destination, simply replace `duckdb` with the name of your
+2. If you'd like to use a different destination, simply replace `duckdb` with the name of your
    preferred [destination](../destinations).
 
-1. After running this command, a new directory will be created with the necessary files and
+3. After running this command, a new directory will be created with the necessary files and
    configuration settings to get started.
 
 For more information, read the
@@ -100,11 +100,11 @@ For more information, read the
    password = "Please set me up!" # # APP Password for the above email account.
    ```
 
-1. Replace the host, email and password value with the [previously copied one](#grab-credentials)
+2. Replace the host, email, and password value with the [previously copied one](#grab-credentials)
    to ensure secure access to your Inbox resources.
    > When adding the App Password, remove any spaces. For instance, "abcd efgh ijkl mnop" should be "abcdefghijklmnop".
 
-1. Next, follow the [destination documentation](../../dlt-ecosystem/destinations) instructions to
+3. Next, follow the [destination documentation](../../dlt-ecosystem/destinations) instructions to
    add credentials for your chosen destination, ensuring proper routing of your data to the final
    destination.
 
@@ -112,7 +112,7 @@ For more information, read the
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
@@ -126,9 +126,9 @@ For more information, read the
    For pdf parsing:
     - PyPDF2: `pip install PyPDF2`
 
-1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
+2. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `standard_inbox`, you may also
@@ -145,7 +145,7 @@ For more information, read the [Walkthrough: Run a pipeline.](../../walkthroughs
 
 This function fetches inbox emails, saves attachments locally, and returns uids, messages, and attachments as resources.
 
-```python
+```py
 @dlt.source
 def inbox_source(
     host: str = dlt.secrets.value,
@@ -158,6 +158,7 @@ def inbox_source(
     filter_by_mime_type: Sequence[str] = None,
     chunksize: int = DEFAULT_CHUNK_SIZE,
 ) -> Sequence[DltResource]:
+   ...
 ```
 
 `host` : IMAP server hostname. Default: 'dlt.secrets.value'.
@@ -182,13 +183,14 @@ def inbox_source(
 
 This resource collects email message UIDs (Unique IDs) from the mailbox.
 
-```python
+```py
 @dlt.resource(name="uids")
 def get_messages_uids(
     initial_message_num: Optional[
         dlt.sources.incremental[int]
     ] = dlt.sources.incremental("message_uid", initial_value=1),
 ) -> TDataItem:
+   ...
 ```
 
 `initial_message_num`: provides incremental loading on UID.
@@ -197,12 +199,13 @@ def get_messages_uids(
 
 This resource retrieves emails by UID (Unique IDs), yielding a dictionary with metadata like UID, ID, sender, subject, dates, content type, and body.
 
-```python
+```py
 @dlt.transformer(name="messages", primary_key="message_uid")
 def get_messages(
     items: TDataItems,
     include_body: bool = True,
 ) -> TDataItem:
+   ...
 ```
 
 `items`: An iterable containing dictionaries with 'message_uid' representing the email message UIDs.
@@ -214,7 +217,7 @@ def get_messages(
 Similar to the previous resources, resource `get_attachments` extracts email attachments by UID from the IMAP server.
 It yields file items with attachments in the file_content field and the original email in the message field.
 
-```python
+```py
 @dlt.transformer(
     name="attachments",
     primary_key="file_hash",
@@ -222,6 +225,7 @@ It yields file items with attachments in the file_content field and the original
 def get_attachments(
     items: TDataItems,
 ) -> Iterable[List[FileItem]]:
+   ...
 ```
 `items`: An iterable containing dictionaries with 'message_uid' representing the email message UIDs.
 
@@ -236,7 +240,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="standard_inbox",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -246,11 +250,11 @@ verified source.
    To read more about pipeline configuration, please refer to our
    [documentation](../../general-usage/pipeline).
 
-1. To load messages from "mycreditcard@bank.com" starting "2023-10-1":
+2. To load messages from "mycreditcard@bank.com" starting "2023-10-1":
 
     - Set `DEFAULT_START_DATE = pendulum.datetime(2023, 10, 1)` in `./inbox/settings.py`.
     - Use the following code:
-      ```python
+      ```py
       # Retrieve messages from the specified email address.
       messages = inbox_source(filter_emails=("mycreditcard@bank.com",)).messages
       # Configure messages to exclude body and name the result "my_inbox".
@@ -261,18 +265,18 @@ verified source.
       print(load_info)
       ```
       > Please refer to inbox_source() docstring for email filtering options by sender, date, or mime type.
-1. To load messages from multiple emails, including "community@dlthub.com":
+3. To load messages from multiple emails, including "community@dlthub.com":
 
-   ```python
+   ```py
    messages = inbox_source(
         filter_emails=("mycreditcard@bank.com", "community@dlthub.com.")
    ).messages
    ```
 
-1. In `inbox_pipeline.py`, the `pdf_to_text` transformer extracts text from PDFs, treating each page as a separate data item.
+4. In `inbox_pipeline.py`, the `pdf_to_text` transformer extracts text from PDFs, treating each page as a separate data item.
    Using the `pdf_to_text` function to load parsed pdfs from mail to the database:
 
-   ```python
+   ```py
    filter_emails = ["mycreditcard@bank.com", "community@dlthub.com."] # Email senders
    attachments = inbox_source(
         filter_emails=filter_emails, filter_by_mime_type=["application/pdf"]
@@ -285,4 +289,4 @@ verified source.
    ```
 
 <!--@@@DLT_SNIPPET_START tuba::inbox-->
-<!--@@@DLT_SNIPPET_END tuba::inbox-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::inbox-->
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/jira.md b/docs/website/docs/dlt-ecosystem/verified-sources/jira.md
index 4588f4f4c6..068251a927 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/jira.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/jira.md
@@ -3,7 +3,7 @@
 :::info Need help deploying these sources, or figuring out how to run them in your data stack?
 
 [Join our Slack community](https://dlthub.com/community)
-or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer Adrian.
+or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer, Adrian.
 :::
 
 [Jira](https://www.atlassian.com/software/jira) by Atlassian helps teams manage projects and tasks
@@ -11,16 +11,16 @@ efficiently, prioritize work, and collaborate.
 
 This Jira `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/jira_pipeline.py)
-loads data using Jira API to the destination of your choice.
+loads data using the Jira API to the destination of your choice.
 
 The endpoints that this verified source supports are:
 
 | Name      | Description                                                                              |
 | --------- | ---------------------------------------------------------------------------------------- |
-| issues    | individual pieces of work to be completed                                                |
-| users     | administrator of a given project                                                         |
-| workflows | the key aspect of managing and tracking the progress of issues or tasks within a project |
-| projects  | a collection of tasks that need to be completed to achieve a certain outcome             |
+| issues    | Individual pieces of work to be completed                                                |
+| users     | Administrators of a given project                                                         |
+| workflows | The key aspect of managing and tracking the progress of issues or tasks within a project |
+| projects  | A collection of tasks that need to be completed to achieve a certain outcome             |
 
 To get a complete list of sub-endpoints that can be loaded, see
 [jira/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/jira/settings.py)
@@ -51,7 +51,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init jira duckdb
    ```
 
@@ -96,25 +96,25 @@ For more information, read the guide on [how to add a verified source](../../wal
    add credentials for your chosen destination, ensuring proper routing of your data to the final
    destination.
 
-For more information, read the [General Usage: Credentials.](../../general-usage/credentials)
+For more information, read [General Usage: Credentials.](../../general-usage/credentials)
 
 ## Run the pipeline
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python jira_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
-   For example, the `pipeline_name` for the above pipeline example is `jira_pipeline`, you may also
+   For example, the `pipeline_name` for the above pipeline example is `jira_pipeline`. You may also
    use any custom name instead.
 
 For more information, read the guide on [how to run a pipeline](../../walkthroughs/run-a-pipeline).
@@ -134,41 +134,43 @@ it is important to note the complete list of the default endpoints given in
 
 This source function creates a list of resources to load data into the destination.
 
-```python
+```py
 @dlt.source
 def jira(
      subdomain: str = dlt.secrets.value,
      email: str = dlt.secrets.value,
      api_token: str = dlt.secrets.value,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 - `subdomain`: The subdomain of the Jira account. Configured in ".dlt/secrets.toml".
 - `email`: The email associated with the Jira account. Configured in ".dlt/secrets.toml".
-- `api_token`: The API token for accessing the Jira account.Configured in ".dlt/secrets.toml".
+- `api_token`: The API token for accessing the Jira account. Configured in ".dlt/secrets.toml".
 
 ### Source `jira_search`
 
 This function returns a resource for querying issues using JQL
 [(Jira Query Language)](https://support.atlassian.com/jira-service-management-cloud/docs/use-advanced-search-with-jira-query-language-jql/).
 
-```python
+```py
 @dlt.source
 def jira_search(
      subdomain: str = dlt.secrets.value,
      email: str = dlt.secrets.value,
      api_token: str = dlt.secrets.value,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
-The above function uses the same arguments `subdomain`, `email` and `api_token` as described above
-for [jira source](jira.md#source-jira).
+The above function uses the same arguments `subdomain`, `email`, and `api_token` as described above
+for the [jira source](jira.md#source-jira).
 
 ### Resource `issues`
 
 The resource function searches issues using JQL queries and then loads them to the destination.
 
-```python
+```py
 @dlt.resource(write_disposition="replace")
 def issues(jql_queries: List[str]) -> Iterable[TDataItem]:
     api_path = "rest/api/3/search"
@@ -179,14 +181,14 @@ def issues(jql_queries: List[str]) -> Iterable[TDataItem]:
 ## Customization
 ### Create your own pipeline
 
-If you wish to create your own pipelines you can leverage source and resource methods as discussed
+If you wish to create your own pipelines, you can leverage source and resource methods as discussed
 above.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset. To read more
    about pipeline configuration, please refer to our documentation
    [here](https://dlthub.com/docs/general-usage/pipeline):
 
-    ```python
+    ```py
     pipeline = dlt.pipeline(
         pipeline_name="jira_pipeline",  # Use a custom name if desired
         destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -196,20 +198,20 @@ above.
 
 2. To load custom endpoints such as “issues” and “users” using the jira source function:
 
-    ```python
+    ```py
     #Run the pipeline
     load_info = pipeline.run(jira().with_resources("issues","users"))
     print(f"Load Information: {load_info}")
     ```
 
-3. To load the custom issues using JQL queries, you can use custom queries, here is an example
+3. To load the custom issues using JQL queries, you can use custom queries. Here is an example
    below:
 
-    ```python
+    ```py
     # Define the JQL queries as follows
     queries = [
               "created >= -30d order by created DESC",
-              "created >= -30d AND project = DEV AND issuetype = Epic AND status = "In Progress" order by created DESC",
+              'created >= -30d AND project = DEV AND issuetype = Epic AND status = "In Progress" order by created DESC',
               ]
     # Run the pipeline
     load_info = pipeline.run(jira_search().issues(jql_queries=queries))
@@ -218,4 +220,4 @@ above.
     ```
 
 <!--@@@DLT_SNIPPET_START tuba::jira-->
-<!--@@@DLT_SNIPPET_END tuba::jira-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::jira-->
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/kafka.md b/docs/website/docs/dlt-ecosystem/verified-sources/kafka.md
index 694a81ba1f..0cedad6645 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/kafka.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/kafka.md
@@ -9,13 +9,13 @@ keywords: [kafka api, kafka verified source, kafka]
 :::info Need help deploying these sources, or figuring out how to run them in your data stack?
 
 [Join our Slack community](https://join.slack.com/t/dlthub-community/shared_invite/zt-1n5193dbq-rCBmJ6p~ckpSFK4hCF2dYA)
-or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer Adrian.
+or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer, Adrian.
 :::
 
 [Kafka](https://www.confluent.io/) is an open-source distributed event streaming platform, organized
 in the form of a log with message publishers and subscribers.
-The Kafka `dlt` verified source loads data using Confluent Kafka API to the destination of your choice,
-see a [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/kafka_pipeline.py).
+The Kafka `dlt` verified source loads data using the Confluent Kafka API to the destination of your choice.
+See a [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/kafka_pipeline.py).
 
 The resource that can be loaded:
 
@@ -29,7 +29,7 @@ The resource that can be loaded:
 
 1. Follow the [Kafka Setup](https://developer.confluent.io/get-started/python/#kafka-setup) to tweak a
 project.
-1. Follow the [Configuration](https://developer.confluent.io/get-started/python/#configuration) to
+2. Follow the [Configuration](https://developer.confluent.io/get-started/python/#configuration) to
 get the project credentials.
 
 ### Initialize the verified source
@@ -38,7 +38,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init kafka duckdb
    ```
 
@@ -47,10 +47,10 @@ To get started with your data pipeline, follow these steps:
    with Kafka as the [source](../../general-usage/source) and [duckdb](../destinations/duckdb.md)
    as the [destination](../destinations).
 
-1. If you'd like to use a different destination, simply replace `duckdb` with the name of your
+2. If you'd like to use a different destination, simply replace `duckdb` with the name of your
    preferred [destination](../destinations).
 
-1. After running this command, a new directory will be created with the necessary files and
+3. After running this command, a new directory will be created with the necessary files and
    configuration settings to get started.
 
 For more information, read the
@@ -80,25 +80,27 @@ sasl_password="example_secret"
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
-1. You're now ready to run the pipeline! To get started, run the following command:
+2. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python kafka_pipeline.py
    ```
 
-1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
+3. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
 For more information, read the [Walkthrough: Run a pipeline](../../walkthroughs/run-a-pipeline).
 
+:::info If you created a topic and start reading from it immedately, the brokers may be not yet synchronized and offset from which `dlt` reads messages may become invalid. In this case the resource will return no messages. Pending messages will be received on next run (or when brokers synchronize)
+
 ## Sources and resources
 
 `dlt` works on the principle of [sources](../../general-usage/source) and
@@ -108,7 +110,7 @@ For more information, read the [Walkthrough: Run a pipeline](../../walkthroughs/
 
 This function retrieves messages from the given Kafka topics.
 
-```python
+```py
 @dlt.resource(name="kafka_messages", table_name=lambda msg: msg["_kafka"]["topic"], standalone=True)
 def kafka_consumer(
     topics: Union[str, List[str]],
@@ -118,29 +120,30 @@ def kafka_consumer(
     batch_timeout: Optional[int] = 3,
     start_from: Optional[TAnyDateTime] = None,
 ) -> Iterable[TDataItem]:
+   ...
 ```
 
 `topics`: A list of Kafka topics to be extracted.
 
-`credentials`: By default, is initialized with the data from
-the `secrets.toml`. May be used explicitly to pass an initialized
+`credentials`: By default, it is initialized with the data from
+the `secrets.toml`. It may be used explicitly to pass an initialized
 Kafka Consumer object.
 
-`msg_processor`: A function, which'll be used to process every message
+`msg_processor`: A function, which will be used to process every message
 read from the given topics before saving them in the destination.
-Can be used explicitly to pass a custom processor. See the
+It can be used explicitly to pass a custom processor. See the
 [default processor](https://github.com/dlt-hub/verified-sources/blob/fe8ed7abd965d9a0ca76d100551e7b64a0b95744/sources/kafka/helpers.py#L14-L50)
 as an example of how to implement processors.
 
-`batch_size`: The amount of messages to extract from the cluster
-at once. Can be set to tweak performance.
+`batch_size`: The number of messages to extract from the cluster
+at once. It can be set to tweak performance.
 
-`batch_timeout`: The maximum timeout for a single batch reading
-operation. Can be set to tweak performance.
+`batch_timeout`: The maximum timeout (in seconds) for a single batch reading
+operation. It can be set to tweak performance.
 
-`start_from`: A timestamp, starting with which the messages must
+`start_from`: A timestamp, starting from which the messages must
 be read. When passed, `dlt` asks the Kafka cluster for an offset,
-actual for the given timestamp, and starts to read messages from
+which is actual for the given timestamp, and starts to read messages from
 this offset.
 
 
@@ -151,7 +154,7 @@ this offset.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
         pipeline_name="kafka",     # Use a custom name if desired
         destination="duckdb",      # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -159,18 +162,18 @@ this offset.
    )
    ```
 
-1. To extract several topics:
+2. To extract several topics:
 
-   ```python
+   ```py
    topics = ["topic1", "topic2", "topic3"]
 
    source = kafka_consumer(topics)
    pipeline.run(source, write_disposition="replace")
    ```
 
-1. To extract messages and process them in a custom way:
+3. To extract messages and process them in a custom way:
 
-   ```python
+   ```py
     def custom_msg_processor(msg: confluent_kafka.Message) -> Dict[str, Any]:
         return {
             "_kafka": {
@@ -185,12 +188,12 @@ this offset.
     pipeline.run(data)
    ```
 
-1. To extract messages, starting from a timestamp:
+4. To extract messages, starting from a timestamp:
 
-   ```python
+   ```py
     data = kafka_consumer("topic", start_from=pendulum.datetime(2023, 12, 15))
     pipeline.run(data)
    ```
 
 <!--@@@DLT_SNIPPET_START tuba::kafka-->
-<!--@@@DLT_SNIPPET_END tuba::kafka-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::kafka-->
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/matomo.md b/docs/website/docs/dlt-ecosystem/verified-sources/matomo.md
index 45841850c6..8be748b1a3 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/matomo.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/matomo.md
@@ -44,7 +44,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init matomo duckdb
    ```
 
@@ -102,16 +102,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python matomo_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `matomo`, you may also
@@ -128,7 +128,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 This function executes and loads a set of reports defined in "queries" for a specific Matomo site identified by "site_id".
 
-```python
+```py
 @dlt.source(max_table_nesting=2)
 def matomo_reports(
     api_token: str = dlt.secrets.value,
@@ -136,6 +136,7 @@ def matomo_reports(
     queries: List[DictStrAny] = dlt.config.value,
     site_id: int = dlt.config.value,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `api_token`: API access token for Matomo server authentication, defaults to "./dlt/secrets.toml"
@@ -152,7 +153,7 @@ def matomo_reports(
 
 The function loads visits from current day and the past `initial_load_past_days` in first run. In subsequent runs it continues from last load and skips active visits until closed.
 
-```python
+```py
 def matomo_visits(
     api_token: str = dlt.secrets.value,
     url: str = dlt.config.value,
@@ -162,6 +163,7 @@ def matomo_visits(
     visit_max_duration_seconds: int = 3600,
     get_live_event_visitors: bool = False,
 ) -> List[DltResource]:
+   ...
 ```
 
 `api_token`: API token for authentication, defaulting to "./dlt/secrets.toml".
@@ -184,7 +186,7 @@ def matomo_visits(
 
 This function retrieves site visits within a specified timeframe. If a start date is given, it begins from that date. If not, it retrieves all visits up until now.
 
-```python
+```py
 @dlt.resource(
     name="visits", write_disposition="append", primary_key="idVisit", selected=True
 )
@@ -196,6 +198,7 @@ def get_last_visits(
     visit_max_duration_seconds: int = 3600,
     rows_per_page: int = 2000,
 ) -> Iterator[TDataItem]:
+   ...
 ```
 
 `site_id`: Unique ID for each Matomo site.
@@ -215,7 +218,7 @@ def get_last_visits(
 
 This function, retrieves unique visit information from get_last_visits.
 
-```python
+```py
 @dlt.transformer(
     data_from=get_last_visits,
     write_disposition="merge",
@@ -225,6 +228,7 @@ This function, retrieves unique visit information from get_last_visits.
 def get_unique_visitors(
     visits: List[DictStrAny], client: MatomoAPIClient, site_id: int
 ) -> Iterator[TDataItem]:
+   ...
 ```
 
 `visits`: Recent visit data within the specified timeframe.
@@ -242,7 +246,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="matomo",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -255,7 +259,7 @@ verified source.
 
 1. To load the data from reports.
 
-   ```python
+   ```py
    data_reports = matomo_reports()
    load_info = pipeline_reports.run(data_reports)
    print(load_info)
@@ -264,7 +268,7 @@ verified source.
 
 1. To load custom data from reports using queries.
 
-   ```python
+   ```py
    queries = [
        {
            "resource_name": "custom_report_name",
@@ -285,7 +289,7 @@ verified source.
 
 1. To load data from reports and visits.
 
-   ```python
+   ```py
    data_reports = matomo_reports()
    data_events = matomo_visits()
    load_info = pipeline_reports.run([data_reports, data_events])
@@ -294,7 +298,7 @@ verified source.
 
 1. To load data on live visits and visitors, and only retrieve data from today.
 
-   ```python
+   ```py
    load_data = matomo_visits(initial_load_past_days=1, get_live_event_visitors=True)
    load_info = pipeline_events.run(load_data)
    print(load_info)
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md b/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md
index 9178d2ab6d..a30eb3f248 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md
@@ -66,30 +66,30 @@ Here are the typical ways to configure MongoDB and their connection URLs:
 
 1. Connect to MongoDB:
 
-   ```bash
+   ```sh
    mongo "mongodb://dbuser:passwd@your_host:27017"
    ```
 
 1. List all Databases:
 
-   ```bash
+   ```sh
    show dbs
    ```
 
 1. View Collections in a Database:
 
    1. Switch to Database:
-      ```bash
+      ```sh
       use your_database_name
       ```
    1. Display its Collections:
-      ```bash
+      ```sh
       show collections
       ```
 
 1. Disconnect:
 
-   ```bash
+   ```sh
    exit
    ```
 
@@ -115,7 +115,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init mongodb duckdb
    ```
 
@@ -174,16 +174,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python mongodb_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `local_mongo`, you may also
@@ -200,7 +200,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 This function loads data from a MongoDB database, yielding one or multiple collections to be retrieved.
 
-```python
+```py
 @dlt.source
 def mongodb(
     connection_url: str = dlt.secrets.value,
@@ -209,6 +209,7 @@ def mongodb(
     incremental: Optional[dlt.sources.incremental] = None,  # type: ignore[type-arg]
     write_disposition: Optional[str] = dlt.config.value,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `connection_url`: MongoDB connection URL.
@@ -226,7 +227,7 @@ def mongodb(
 
 This function fetches a single collection from a MongoDB database using PyMongo.
 
-```python
+```py
 def mongodb_collection(
     connection_url: str = dlt.secrets.value,
     database: Optional[str] = dlt.config.value,
@@ -234,6 +235,7 @@ def mongodb_collection(
     incremental: Optional[dlt.sources.incremental] = None,  # type: ignore[type-arg]
     write_disposition: Optional[str] = dlt.config.value,
 ) -> Any:
+   ...
 ```
 
 `collection`: Name of the collection to load.
@@ -247,7 +249,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="mongodb_pipeline",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -257,7 +259,7 @@ verified source.
 
 1. To load all the collections in a database:
 
-   ```python
+   ```py
    load_data = mongodb()
    load_info = pipeline.run(load_data, write_disposition="replace")
    print(load_info)
@@ -265,7 +267,7 @@ verified source.
 
 1. To load a specific collections from the database:
 
-   ```python
+   ```py
    load_data = mongodb().with_resources("collection_1", "collection_2")
    load_info = pipeline.run(load_data, write_disposition="replace")
    print(load_info)
@@ -273,7 +275,7 @@ verified source.
 
 1. To load specific collections from the source incrementally:
 
-   ```python
+   ```py
    load_data = mongodb(incremental=dlt.sources.incremental("date")).with_resources("collection_1")
    load_info = pipeline.run(load_data, write_disposition = "merge")
    print(load_info)
@@ -282,7 +284,7 @@ verified source.
 
 1. To load data from a particular collection say "movies" incrementally:
 
-   ```python
+   ```py
    load_data = mongodb_collection(
        collection="movies",
        incremental=dlt.sources.incremental(
@@ -300,7 +302,7 @@ verified source.
 
 1. To incrementally load a table with an append-only disposition using hints:
 
-   ```python
+   ```py
    # Suitable for tables where new rows are added, but existing rows aren't updated.
    # Load data from the 'listingsAndReviews' collection in MongoDB, using 'last_scraped' for incremental addition.
    airbnb = mongodb().with_resources("listingsAndReviews")
@@ -317,7 +319,7 @@ verified source.
 
 1. To load a selected collection and rename it in the destination:
 
-   ```python
+   ```py
     # Create the MongoDB source and select the "collection_1" collection
     source = mongodb().with_resources("collection_1")
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/mux.md b/docs/website/docs/dlt-ecosystem/verified-sources/mux.md
index a713121f29..338611e657 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/mux.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/mux.md
@@ -46,7 +46,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init mux duckdb
    ```
 
@@ -88,16 +88,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python mux_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is
@@ -115,7 +115,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 This function yields resources "asset_resource" and "views_resource" to load video assets and views.
 
-```python
+```py
 @dlt.source
 def mux_source() -> Iterable[DltResource]:
     yield assets_resource
@@ -126,13 +126,14 @@ def mux_source() -> Iterable[DltResource]:
 
 The assets_resource function fetches metadata about video assets from the Mux API's "assets" endpoint.
 
-```python
+```py
 @dlt.resource(write_disposition="merge")
 def assets_resource(
     mux_api_access_token: str = dlt.secrets.value,
     mux_api_secret_key: str = dlt.secrets.value,
     limit: int = DEFAULT_LIMIT,
 ) -> Iterable[TDataItem]:
+    ...
 ```
 
 `mux_api_access_token`: Mux API token for authentication, defaults to ".dlt/secrets.toml".
@@ -145,13 +146,14 @@ def assets_resource(
 
 This function yields data about every video view from yesterday to be loaded.
 
-```python
+```py
 @dlt.resource(write_disposition="append")
 def views_resource(
     mux_api_access_token: str = dlt.secrets.value,
     mux_api_secret_key: str = dlt.secrets.value,
     limit: int = DEFAULT_LIMIT,
 ) -> Iterable[DltResource]:
+    ...
 ```
 
 The arguments `mux_api_access_token`, `mux_api_secret_key` and `limit` are the same as described [above](#resource-assets_resource) in "asset_resource".
@@ -165,7 +167,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-    ```python
+    ```py
     pipeline = dlt.pipeline(
         pipeline_name="mux_pipeline", # Use a custom name if desired
         destination="bigquery", # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -175,21 +177,21 @@ verified source.
 
 1. To load metadata about every asset to be loaded:
 
-    ```python
-    load_info = pipeline.run(mux_source().with_resources("assets_resource")
+    ```py
+    load_info = pipeline.run(mux_source().with_resources("assets_resource"))
     print(load_info)
     ```
 
 1. To load data for each video view from yesterday:
 
-    ```python
-    load_info = pipeline.run(mux_source().with_resources("views_resource")
+    ```py
+    load_info = pipeline.run(mux_source().with_resources("views_resource"))
     print(load_info)
     ```
 
 1. To load both metadata about assets and video views from yesterday:
 
-    ```python
+    ```py
     load_info = pipeline.run(mux_source())
     print(load_info)
     ```
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/notion.md b/docs/website/docs/dlt-ecosystem/verified-sources/notion.md
index ffb0becfbb..650fc10fde 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/notion.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/notion.md
@@ -50,7 +50,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init notion duckdb
    ```
 
@@ -93,16 +93,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python notion_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `notion`, you may also use any
@@ -119,12 +119,13 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 This function loads notion databases from notion into the destination.
 
-```python
+```py
 @dlt.source
 def notion_databases(
     database_ids: Optional[List[Dict[str, str]]] = None,
     api_key: str = dlt.secrets.value,
 ) -> Iterator[DltResource]:
+   ...
 ```
 
 `database_ids`: A list of dictionaries each containing a database id and a name.
@@ -146,7 +147,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
       pipeline_name="notion",  # Use a custom name if desired
       destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -159,7 +160,7 @@ verified source.
 
 1. To load all the integrated databases:
 
-   ```python
+   ```py
    load_data = notion_databases()
    load_info = pipeline.run(load_data)
    print(load_info)
@@ -167,7 +168,7 @@ verified source.
 
 1. To load the custom databases:
 
-   ```python
+   ```py
    selected_database_ids = [{"id": "0517dae9409845cba7d","use_name":"db_one"}, {"id": "d8ee2d159ac34cfc"}]
    load_data = notion_databases(database_ids=selected_database_ids)
    load_info = pipeline.run(load_data)
@@ -176,7 +177,7 @@ verified source.
 
    The Database ID can be retrieved from the URL. For example if the URL is:
 
-   ```shell
+   ```sh
    https://www.notion.so/d8ee2d159ac34cfc85827ba5a0a8ae71?v=c714dec3742440cc91a8c38914f83b6b
    ```
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/personio.md b/docs/website/docs/dlt-ecosystem/verified-sources/personio.md
index 6fae36d0ec..af951bd21a 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/personio.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/personio.md
@@ -57,7 +57,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init personio duckdb
    ```
 
@@ -102,16 +102,16 @@ For more information, read [Credentials](../../general-usage/credentials).
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python personio_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `personio`, you may also use
@@ -127,7 +127,7 @@ For more information, read [Run a pipeline.](../../walkthroughs/run-a-pipeline)
 ### Source `personio_source`
 
 This `dlt` source returns data resources like `employees`, `absences`, `absence_types`, etc.
-```python
+```py
 @dlt.source(name="personio")
 def personio_source(
     client_id: str = dlt.secrets.value,
@@ -158,8 +158,8 @@ def personio_source(
 
 This resource retrieves data on all the employees in a company.
 
-```python
- @dlt.resource(primary_key="id", write_disposition="merge")
+```py
+@dlt.resource(primary_key="id", write_disposition="merge")
 def employees(
     updated_at: dlt.sources.incremental[
         pendulum.DateTime
@@ -185,9 +185,10 @@ data incrementally from the Personio API to your preferred destination.
 ### Resource `absence_types`
 
 Simple resource, which retrieves a list of various types of employee absences.
-```python
+```py
 @dlt.resource(primary_key="id", write_disposition="replace")
 def absence_types(items_per_page: int = items_per_page) -> Iterable[TDataItem]:
+   ...
 ...
 ```
 
@@ -209,7 +210,7 @@ The transformer functions transform or process data from resources.
 The transformer function `employees_absences_balance` process data from the `employees` resource.
 It fetches and returns a list of the absence balances for each employee.
 
-```python
+```py
 @dlt.transformer(
     data_from=employees,
     write_disposition="merge",
@@ -232,7 +233,7 @@ verified source.
 
 1. Configure the [pipeline](../../general-usage/pipeline) by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
       pipeline_name="personio",  # Use a custom name if desired
       destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -242,14 +243,14 @@ verified source.
 
 1. To load employee data:
 
-   ```python
+   ```py
    load_data = personio_source().with_resources("employees")
    print(pipeline.run(load_data))
    ```
 
 1. To load data from all supported endpoints:
 
-   ```python
+   ```py
    load_data = personio_source()
    print(pipeline.run(load_data))
    ```
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/pipedrive.md b/docs/website/docs/dlt-ecosystem/verified-sources/pipedrive.md
index 9d1a5a0a02..9b2c8a640f 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/pipedrive.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/pipedrive.md
@@ -9,7 +9,7 @@ keywords: [pipedrive api, pipedrive verified source, pipedrive]
 :::info Need help deploying these sources, or figuring out how to run them in your data stack?
 
 [Join our Slack community](https://dlthub.com/community)
-or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer Adrian.
+or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer, Adrian.
 :::
 
 [Pipedrive](https://developers.pipedrive.com/docs/api/v1) is a cloud-based sales Customer
@@ -18,7 +18,7 @@ communication, and automate sales processes.
 
 This Pipedrive `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/pipedrive_pipeline.py)
-loads data using “Pipedrive API” to the destination of your choice.
+load data using the “Pipedrive API” to the destination of your choice.
 
 Sources and resources that can be loaded using this verified source are:
 
@@ -53,7 +53,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init pipedrive duckdb
    ```
 
@@ -93,19 +93,19 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python pipedrive_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
-   For example, the `pipeline_name` for the above pipeline example is `pipedrive`, you may also use
+   For example, the `pipeline_name` for the above pipeline example is `pipedrive`, but you may also use
    any custom name instead.
 
 For more information, read the guide on [how to run a pipeline](../../walkthroughs/run-a-pipeline).
@@ -138,12 +138,13 @@ Pipedrive API.
 This function returns a list of resources including activities, deals, custom_fields_mapping and
 other resources data from Pipedrive API.
 
-```python
+```py
 @dlt.source(name="pipedrive")
 def pipedrive_source(
     pipedrive_api_key: str = dlt.secrets.value,
     since_timestamp: Optional[Union[pendulum.DateTime, str]] = dlt.config.value,
 ) -> Iterator[DltResource]:
+   ...
 ```
 
 `pipedrive_api_key`: Authentication token for Pipedrive, configured in ".dlt/secrets.toml".
@@ -151,7 +152,7 @@ def pipedrive_source(
 `since_timestamp`: Starting timestamp for incremental loading. By default, complete history is loaded
  on the first run. And new data in subsequent runs.
 
-> Note: Incremental loading can be enabled or disabled depending on user prefrences.
+> Note: Incremental loading can be enabled or disabled depending on user preferences.
 
 ### Resource `iterator RECENTS_ENTITIES`
 
@@ -159,7 +160,7 @@ This code generates resources for each entity in
 [RECENTS_ENTITIES](https://github.com/dlt-hub/verified-sources/blob/master/sources/pipedrive/settings.py),
 stores them in endpoints_resources, and then loads data from each endpoint to the destination.
 
-```python
+```py
 endpoints_resources = {}
 for entity, resource_name in RECENTS_ENTITIES.items():
     endpoints_resources[resource_name] = dlt.resource(
@@ -186,7 +187,7 @@ for entity, resource_name in RECENTS_ENTITIES.items():
 
 This function gets the participants of deals from the Pipedrive API and yields the result.
 
-```python
+```py
 def pipedrive_source(args):
   # Rest of function
    yield endpoints_resources["deals"] |  dlt.transformer(
@@ -209,12 +210,13 @@ further processing or loading.
 This function preserves the mapping of custom fields across different pipeline runs. It is used to
 create and store a mapping of custom fields for different entities in the source state.
 
-```python
+```py
 @dlt.resource(selected=False)
 def create_state(pipedrive_api_key: str) -> Iterator[Dict[str, Any]]:
     def _get_pages_for_rename(
         entity: str, fields_entity: str, pipedrive_api_key: str
     ) -> Dict[str, Any]:
+      ...
 ```
 
 It processes each entity in ENTITY_MAPPINGS, updating the custom fields mapping if a related fields
@@ -238,7 +240,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="pipedrive",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -251,7 +253,7 @@ verified source.
 
 1. To print source info:
 
-   ```python
+   ```py
    pipedrive_data = pipedrive_source()
    #print source info
    print(pipedrive_data)
@@ -263,15 +265,15 @@ verified source.
 
 1. To load all the data in Pipedrive:
 
-   ```python
+   ```py
    load_data = pipedrive_source() # calls the source function
-   load_info = pipeline.run(load_info) #runs the pipeline with selected source configuration
+   load_info = pipeline.run(load_data) #runs the pipeline with selected source configuration
    print(load_info)
    ```
 
 1. To load data from selected resources:
 
-   ```python
+   ```py
    #To load custom fields, include custom_fields_mapping for hash to name mapping.
    load_data = pipedrive_source().with_resources("products", "deals", "deals_participants", "custom_fields_mapping")
    load_info = pipeline.run(load_data) #runs the pipeline loading selected data
@@ -280,7 +282,7 @@ verified source.
 
 1. To load data from a start date:
 
-   ```python
+   ```py
    # Configure a source for 'activities' starting from the specified date.
    # The 'custom_fields_mapping' is incorporated to convert custom field hashes into their respective names.
    activities_source = pipedrive_source(
@@ -293,4 +295,4 @@ verified source.
    ```
 
 <!--@@@DLT_SNIPPET_START tuba::pipedrive-->
-<!--@@@DLT_SNIPPET_END tuba::pipedrive-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::pipedrive-->
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md b/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md
index aa8fbe10d4..7d6b6e036a 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md
@@ -63,7 +63,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init salesforce duckdb
    ```
 
@@ -110,16 +110,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python salesforce_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `salesforce`, you may also use
@@ -137,13 +137,14 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 This function returns a list of resources to load users, user_role, opportunity,
 opportunity_line_item, account etc. data from Salesforce API.
 
-```python
+```py
 @dlt.source(name="salesforce")
 def salesforce_source(
     user_name: str = dlt.secrets.value,
     password: str = dlt.secrets.value,
     security_token: str = dlt.secrets.value,
 ) ->Iterable[DltResource]:
+   ...
 ```
 
 - `user_name`: Your Salesforce account username.
@@ -156,7 +157,7 @@ def salesforce_source(
 
 This resource function retrieves records from the Salesforce "User" endpoint.
 
-```python
+```py
 @dlt.resource(write_disposition="replace")
 def sf_user() -> Iterator[Dict[str, Any]]:
     yield from get_records(client, "User")
@@ -176,7 +177,7 @@ the "user_role" endpoint.
 This resource function retrieves records from the Salesforce "Opportunity" endpoint in incremental
 mode.
 
-```python
+```py
 @dlt.resource(write_disposition="merge")
 def opportunity(
     last_timestamp: Incremental[str] = dlt.sources.incremental(
@@ -215,7 +216,7 @@ To create your data pipeline using single loading and
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="salesforce_pipeline",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -228,7 +229,7 @@ To create your data pipeline using single loading and
 
 1. To load data from all the endpoints, use the `salesforce_source` method as follows:
 
-   ```python
+   ```py
    load_data = salesforce_source()
    source.schema.merge_hints({"not_null": ["id"]})  # Hint for id field not null
    load_info = pipeline.run(load_data)
@@ -241,7 +242,7 @@ To create your data pipeline using single loading and
 
 1. To use the method `pipeline.run()` to load custom endpoints “candidates” and “members”:
 
-   ```python
+   ```py
    load_info = pipeline.run(load_data.with_resources("opportunity", "contact"))
    # print the information on data that was loaded
    print(load_info)
@@ -260,7 +261,7 @@ To create your data pipeline using single loading and
 
 1. To load data from the “contact” in replace mode and “task” incrementally merge mode endpoints:
 
-   ```python
+   ```py
    load_info = pipeline.run(load_data.with_resources("contact", "task"))
    # pretty print the information on data that was loaded
    print(load_info)
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md b/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md
index 09dc392c87..af00b17703 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md
@@ -61,7 +61,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init shopify_dlt duckdb
    ```
 
@@ -125,16 +125,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 1. You're now ready to run the pipeline! To get started, run the following command:
-   ```bash
+   ```sh
    python shopify_dlt_pipeline.py
    ```
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is `shopify_data`, you may also
@@ -152,7 +152,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 This function returns a list of resources to load products, orders, and customers data from Shopify
 API.
 
-```python
+```py
 def shopify_source(
     private_app_password: str = dlt.secrets.value,
     api_version: str = DEFAULT_API_VERSION,
@@ -163,6 +163,7 @@ def shopify_source(
     items_per_page: int = DEFAULT_ITEMS_PER_PAGE,
     order_status: TOrderStatus = "any",
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `private_app_password`: App's password for your shop.
@@ -188,7 +189,7 @@ incremental loading if unspecified.
 This resource loads products from your Shopify shop into the destination. It supports incremental
 loading and pagination.
 
-```python
+```py
 @dlt.resource(primary_key="id", write_disposition="merge")
 def products(
     updated_at: dlt.sources.incremental[
@@ -202,6 +203,7 @@ def products(
     created_at_min: pendulum.DateTime = created_at_min_obj,
     items_per_page: int = items_per_page,
 ) -> Iterable[TDataItem]:
+   ...
 ```
 
 `updated_at`: The saved [state](../../general-usage/state) of the last 'updated_at' value.
@@ -212,7 +214,7 @@ support incremental loading and pagination.
 ### Resource `shopify_partner_query`:
 This resource can be used to run custom GraphQL queries to load paginated data.
 
-```python
+```py
 @dlt.resource
 def shopify_partner_query(
     query: str,
@@ -224,6 +226,7 @@ def shopify_partner_query(
     organization_id: str = dlt.config.value,
     api_version: str = DEFAULT_PARTNER_API_VERSION,
 ) -> Iterable[TDataItem]:
+   ...
 ```
 
 `query`: The GraphQL query for execution.
@@ -251,7 +254,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="shopify",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -264,7 +267,7 @@ verified source.
 
 1. To load data from "products", "orders" and "customers" from 1st Jan 2023.
 
-   ```python
+   ```py
    # Add your desired resources to the list...
    resources = ["products", "orders", "customers"]
    start_date="2023-01-01"
@@ -278,7 +281,7 @@ verified source.
    minimizes potential failure during large data loads. Running chunks and incremental loads in
    parallel accelerates the initial load.
 
-   ```python
+   ```py
    # Load all orders from 2023-01-01 to now
    min_start_date = current_start_date = pendulum.datetime(2023, 1, 1)
    max_end_date = pendulum.now()
@@ -310,7 +313,7 @@ verified source.
    print(load_info)
    ```
 1. To load the first 10 transactions via GraphQL query from the Shopify Partner API.
-   ```python
+   ```py
     # Construct query to load transactions 100 per page, the `$after` variable is used to paginate
     query = """query Transactions($after: String) {
     transactions(after: $after, first: 10) {
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/slack.md b/docs/website/docs/dlt-ecosystem/verified-sources/slack.md
index 85fd3f2a3a..104eeff388 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/slack.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/slack.md
@@ -9,24 +9,24 @@ keywords: [slack api, slack verified source, slack]
 :::info Need help deploying these sources, or figuring out how to run them in your data stack?
 
 [Join our Slack community](https://dlthub.com/community)
-or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer Adrian.
+or [book a call](https://calendar.app.google/kiLhuMsWKpZUpfho6) with our support engineer, Adrian.
 :::
 
 [Slack](https://slack.com/) is a popular messaging and collaboration platform for teams and organizations.
 
 This Slack `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/slack_pipeline.py)
-loads data using “Slack API” to the destination of your choice.
+load data using the “Slack API” to the destination of your choice.
 
 Sources and resources that can be loaded using this verified source are:
 
 | Name                  | Description                                                                        |
 |-----------------------|------------------------------------------------------------------------------------|
-| slack                 | Retrives all the Slack data: channels, messages for selected channels, users, logs |
-| channels              | Retrives all the channels data                                                     |
-| users                 | Retrives all the users info                                                        |
-| get_messages_resource | Retrives all the messages for a given channel                                      |
-| access_logs           | Retrives the access logs                                                           |
+| slack                 | Retrieves all the Slack data: channels, messages for selected channels, users, logs |
+| channels              | Retrieves all the channels data                                                     |
+| users                 | Retrieves all the users info                                                        |
+| get_messages_resource | Retrieves all the messages for a given channel                                      |
+| access_logs           | Retrieves the access logs                                                           |
 
 ## Setup Guide
 
@@ -67,7 +67,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init slack duckdb
    ```
 
@@ -96,7 +96,7 @@ For more information, read the guide on [how to add a verified source](../../wal
     access_token = "Please set me up!" # please set me up!
     ```
 
-1. Copy the user Oauth token you [copied above](#grab-user-oauth-token).
+1. Copy the user OAuth token you [copied above](#grab-user-oauth-token).
 
 1. Finally, enter credentials for your chosen destination as per the [docs](../destinations/).
 
@@ -107,20 +107,20 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python slack_pipeline.py
    ```
 
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -138,7 +138,7 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 It retrieves data from Slack's API and fetches the Slack data such as channels, messages for selected channels, users, logs.
 
-```python
+```py
 @dlt.source(name="slack", max_table_nesting=2)
 def slack_source(
     page_size: int = MAX_PAGE_SIZE,
@@ -147,6 +147,7 @@ def slack_source(
     end_date: Optional[TAnyDateTime] = None,
     selected_channels: Optional[List[str]] = dlt.config.value,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `page_size`: Maximum items per page (default: 1000).
@@ -161,27 +162,29 @@ def slack_source(
 
 ### Resource `channels`
 
-This function yields all the channels data as `dlt` resource.
+This function yields all the channels data as a `dlt` resource.
 
-```python
+```py
 @dlt.resource(name="channels", primary_key="id", write_disposition="replace")
 def channels_resource() -> Iterable[TDataItem]:
+   ...
 ```
 
 ### Resource `users`
 
-This function yields all the users data as `dlt` resource.
+This function yields all the users data as a `dlt` resource.
 
-```python
+```py
 @dlt.resource(name="users", primary_key="id", write_disposition="replace")
 def users_resource() -> Iterable[TDataItem]:
+   ...
 ```
 
 ### Resource `get_messages_resource`
 
-This method fetches messages for a specified channel from the Slack API. It creates a resource for each channel with channel's name.
+This method fetches messages for a specified channel from the Slack API. It creates a resource for each channel with the channel's name.
 
-```python
+```py
 def get_messages_resource(
     channel_data: Dict[str, Any],
     created_at: dlt.sources.incremental[DateTime] = dlt.sources.incremental(
@@ -191,6 +194,7 @@ def get_messages_resource(
         allow_external_schedulers=True,
     ),
 ) -> Iterable[TDataItem]:
+   ...
 ```
 
 `channel_data`: A dictionary detailing a specific channel to determine where messages are fetched from.
@@ -209,7 +213,7 @@ def get_messages_resource(
 
 This method retrieves access logs from the Slack API.
 
-```python
+```py
 @dlt.resource(
     name="access_logs",
     selected=False,
@@ -218,6 +222,7 @@ This method retrieves access logs from the Slack API.
 )
 # it is not an incremental resource it just has a end_date filter
 def logs_resource() -> Iterable[TDataItem]:
+   ...
 ```
 
 `selected`: A boolean set to False, indicating the resource isn't loaded by default.
@@ -235,7 +240,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
         pipeline_name="slack",  # Use a custom name if desired
         destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -244,7 +249,7 @@ verified source.
    ```
 1. To load Slack resources from the specified start date:
 
-   ```python
+   ```py
    source = slack_source(page_size=1000, start_date=datetime(2023, 9, 1), end_date=datetime(2023, 9, 8))
 
    # Enable below to load only 'access_logs', available for paid accounts only.
@@ -258,7 +263,7 @@ verified source.
 
 1. To load data from selected Slack channels from the specified start date:
 
-   ```python
+   ```py
    # To load data from selected channels.
    selected_channels=["general", "random"] # Enter the channel names here.
 
@@ -275,7 +280,7 @@ verified source.
 
 1. To load only messages from selected Slack resources:
 
-   ```python
+   ```py
    # To load data from selected channels.
    selected_channels=["general", "random"] # Enter the channel names here.
 
@@ -285,10 +290,10 @@ verified source.
        start_date=datetime(2023, 9, 1),
        end_date=datetime(2023, 9, 8),
    )
-   # It loads only massages from the channel "general".
+   # It loads only messages from the channel "general".
    load_info = pipeline.run(source.with_resources("general"))
    print(load_info)
    ```
 
 <!--@@@DLT_SNIPPET_START tuba::slack-->
-<!--@@@DLT_SNIPPET_END tuba::slack-->
\ No newline at end of file
+<!--@@@DLT_SNIPPET_END tuba::slack-->
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md
index 67965863ce..56fc826ce8 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md
@@ -58,8 +58,8 @@ The database above doesn't require a password.
 
 The connection URL can be broken down into:
 
-```python
-connection_url = "connection_string = f"{drivername}://{username}:{password}@{host}:{port}/{database}"
+```py
+connection_url = connection_string = f"{drivername}://{username}:{password}@{host}:{port}{database}"
 ```
 
 `drivername`: Indicates both the database system and driver used.
@@ -116,7 +116,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init sql_database duckdb
    ```
 
@@ -158,7 +158,7 @@ For more information, read the guide on [how to add a verified source](../../wal
 
 1. You can also pass credentials in the pipeline script the following way:
 
-   ```python
+   ```py
    credentials = ConnectionStringCredentials(
        "mysql+pymysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam"
    )
@@ -176,19 +176,19 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 1. Install the necessary dependencies by running the following command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. Run the verified source by entering:
 
-   ```bash
+   ```sh
    python sql_database_pipeline.py
    ```
 
 1. Make sure that everything is loaded as expected with:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -208,7 +208,7 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 This function loads data from an SQL database via SQLAlchemy and auto-creates resources for each
 table or from a specified list of tables.
 
-```python
+```py
 @dlt.source
 def sql_database(
     credentials: Union[ConnectionStringCredentials, Engine, str] = dlt.secrets.value,
@@ -220,6 +220,7 @@ def sql_database(
     defer_table_reflect: Optional[bool] = dlt.config.value,
     table_adapter_callback: Callable[[Table], None] = None,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `credentials`: Database details or an 'sqlalchemy.Engine' instance.
@@ -244,7 +245,7 @@ remove certain columns to be selected.
 
 This function loads data from specific database tables.
 
-```python
+```py
 @dlt.common.configuration.with_config(
     sections=("sources", "sql_database"), spec=SqlTableResourceConfiguration
 )
@@ -259,6 +260,7 @@ def sql_table(
     defer_table_reflect: Optional[bool] = dlt.config.value,
     table_adapter_callback: Callable[[Table], None] = None,
 ) -> DltResource:
+   ...
 ```
 `incremental`: Optional, enables incremental loading.
 
@@ -284,7 +286,7 @@ certain range.
 1. Consider a table with a `last_modified` timestamp column. By setting this column as your cursor and specifying an
    initial value, the loader generates a SQL query filtering rows with `last_modified` values greater than the specified initial value.
 
-   ```python
+   ```py
    from sql_database import sql_table
    from datetime import datetime
 
@@ -303,7 +305,7 @@ certain range.
 
 1. To incrementally load the "family" table using the sql_database source method:
 
-   ```python
+   ```py
    source = sql_database().with_resources("family")
    #using the "updated" field as an incremental field using initial value of January 1, 2022, at midnight
    source.family.apply_hints(incremental=dlt.sources.incremental("updated"),initial_value=pendulum.DateTime(2022, 1, 1, 0, 0, 0))
@@ -315,7 +317,7 @@ certain range.
 
 1. To incrementally load the "family" table using the 'sql_table' resource.
 
-   ```python
+   ```py
    family = sql_table(
        table="family",
        incremental=dlt.sources.incremental(
@@ -342,7 +344,7 @@ When running on Airflow
 
 ### Parallel extraction
 You can extract each table in a separate thread (no multiprocessing at this point). This will decrease loading time if your queries take time to execute or your network latency/speed is low.
-```python
+```py
 database = sql_database().parallelize()
 table = sql_table().parallelize()
 ```
@@ -358,7 +360,7 @@ To create your own pipeline, use source and resource methods from this verified
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
         pipeline_name="rfam",  # Use a custom name if desired
         destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -370,7 +372,7 @@ To create your own pipeline, use source and resource methods from this verified
 
 1. To load the entire database, use the `sql_database` source as:
 
-   ```python
+   ```py
    source = sql_database()
    info = pipeline.run(source, write_disposition="replace")
    print(info)
@@ -378,7 +380,7 @@ To create your own pipeline, use source and resource methods from this verified
 
 1. If you just need the "family" table, use:
 
-   ```python
+   ```py
    source = sql_database().with_resources("family")
    #running the pipeline
    info = pipeline.run(source, write_disposition="replace")
@@ -389,7 +391,7 @@ To create your own pipeline, use source and resource methods from this verified
    [documentation](https://dlthub.com/docs/general-usage/customising-pipelines/pseudonymizing_columns).
    As an example, here's how to pseudonymize the "rfam_acc" column in the "family" table:
 
-   ```python
+   ```py
    import hashlib
 
    def pseudonymize_name(doc):
@@ -421,7 +423,7 @@ To create your own pipeline, use source and resource methods from this verified
 
 1. To exclude columns, such as the "rfam_id" column from the "family" table before loading:
 
-   ```python
+   ```py
    def remove_columns(doc):
        del doc["rfam_id"]
        return doc
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md b/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md
index 4ddf20aa78..0ac1fe7acf 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md
@@ -50,7 +50,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init strapi duckdb
    ```
 
@@ -73,7 +73,7 @@ For more information, read the guide on [how to add a verified source](../../wal
    information securely, like access tokens. Keep this file safe. Here's its format for service
    account authentication:
 
-   ```python
+   ```py
    # put your secret values and credentials here. do not share this file and do not push it to github
    [sources.strapi]
    api_secret_key = "api_secret_key" # please set me up!
@@ -96,13 +96,13 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python strapi_pipeline.py
    ```
 
@@ -113,7 +113,7 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -131,13 +131,14 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 This function retrives data from Strapi.
 
-```python
+```py
 @dlt.source
 def strapi_source(
     endpoints: List[str],
     api_secret_key: str = dlt.secrets.value,
     domain: str = dlt.secrets.value,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `endpoints`: Collections to fetch data from.
@@ -155,7 +156,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
         pipeline_name="strapi",  # Use a custom name if desired
         destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -165,7 +166,7 @@ verified source.
 
 1. To load the specified endpoints:
 
-   ```python
+   ```py
    endpoints = ["athletes"]
    load_data = strapi_source(endpoints=endpoints)
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md b/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md
index 0b172dc3be..118c0e6511 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md
@@ -56,7 +56,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init stripe_analytics duckdb
    ```
 
@@ -96,20 +96,20 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python stripe_analytics_pipeline.py
    ```
 
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -127,7 +127,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 You can write your own pipelines to load data to a destination using this verified source.
 However, it is important to note is how the `ENDPOINTS` and `INCREMENTAL_ENDPOINTS` tuples are defined in `stripe_analytics/settings.py`.
 
-```python
+```py
 # The most popular Stripe API's endpoints
 ENDPOINTS = ("Subscription", "Account", "Coupon", "Customer", "Product", "Price")
 # Possible incremental endpoints
@@ -140,7 +140,7 @@ INCREMENTAL_ENDPOINTS = ("Event", "Invoice", "BalanceTransaction")
 
 This function retrieves data from the Stripe API for the specified endpoint:
 
-```python
+```py
 @dlt.source
 def stripe_source(
     endpoints: Tuple[str, ...] = ENDPOINTS,
@@ -148,6 +148,7 @@ def stripe_source(
     start_date: Optional[DateTime] = None,
     end_date: Optional[DateTime] = None,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 - `endpoints`: Tuple containing endpoint names.
@@ -159,7 +160,7 @@ def stripe_source(
 
 This source loads data in 'append' mode from incremental endpoints.
 
-```python
+```py
 @dlt.source
 def incremental_stripe_source(
     endpoints: Tuple[str, ...] = INCREMENTAL_ENDPOINTS,
@@ -167,6 +168,7 @@ def incremental_stripe_source(
     initial_start_date: Optional[DateTime] = None,
     end_date: Optional[DateTime] = None,
 ) -> Iterable[DltResource]:
+   ...
 ```
 `endpoints`: Tuple containing incremental endpoint names.
 
@@ -183,9 +185,10 @@ For more information, read the [General Usage: Incremental loading](../../genera
 
 This function loads a dictionary with calculated metrics, including MRR and Churn rate, along with the current timestamp.
 
-```python
+```py
 @dlt.resource(name="Metrics", write_disposition="append", primary_key="created")
 def metrics_resource() -> Iterable[TDataItem]:
+   ...
 ```
 
 Abrevations MRR and Churn rate are as follows:
@@ -203,7 +206,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="stripe_pipeline",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -213,7 +216,7 @@ verified source.
 
 1. To load endpoints like "Plan" and "Charge" in replace mode, retrieve all data for the year 2022:
 
-   ```python
+   ```py
    source_single = stripe_source(
        endpoints=("Plan", "Charge"),
        start_date=datetime(2022, 1, 1),
@@ -225,7 +228,7 @@ verified source.
 
 1. To load data from the "Invoice" endpoint, which has static data, using incremental loading:
 
-    ```python
+    ```py
     # Load all data on the first run that was created after start_date and before end_date
     source_incremental = incremental_stripe_source(
         endpoints=("Invoice", ),
@@ -239,7 +242,7 @@ verified source.
 
 1. To load data created after December 31, 2022, adjust the data range for stripe_source to prevent redundant loading. For incremental_stripe_source, the initial_start_date will auto-update to the last loaded date from the previous run.
 
-    ```python
+    ```py
     source_single = stripe_source(
         endpoints=("Plan", "Charge"),
         start_date=datetime(2022, 12, 31),
@@ -254,7 +257,7 @@ verified source.
 
 1. To load important metrics and store them in database:
 
-   ```python
+   ```py
    # Event is an endpoint with uneditable data, so we can use 'incremental_stripe_source'.
    source_event = incremental_stripe_source(endpoints=("Event",))
    # Subscription is an endpoint with editable data, use stripe_source.
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/workable.md b/docs/website/docs/dlt-ecosystem/verified-sources/workable.md
index 8701db7db8..dc4c1936f9 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/workable.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/workable.md
@@ -65,7 +65,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init workable duckdb
    ```
 
@@ -117,20 +117,20 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python workable_pipeline.py
    ```
 
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -146,7 +146,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 Note the default definitions of DEFAULT_ENDPOINTS and DEFAULT_DETAILS in "workable/settings.py".
 
-```python
+```py
 DEFAULT_ENDPOINTS = ("members", "recruiters", "stages", "requisitions", "jobs", "custom_attributes","events")
 
 DEFAULT_DETAILS = {
@@ -164,7 +164,7 @@ endpoints allow incremental 'merge' mode loading.
 
 This source returns a sequence of dltResources that correspond to the endpoints.
 
-```python
+```py
 @dlt.source(name="workable")
 def workable_source(
     access_token: str = dlt.secrets.value,
@@ -172,6 +172,7 @@ def workable_source(
     start_date: Optional[DateTime] = None,
     load_details: bool = False,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `access_token`: Authenticate the Workable API using the token specified in ".dlt/secrets.toml".
@@ -187,13 +188,14 @@ def workable_source(
 
 This function is used to retrieve "candidates" endpoints.
 
-```python
+```py
 @dlt.resource(name="candidates", write_disposition="merge", primary_key="id")
 def candidates_resource(
     updated_at: Optional[Any] = dlt.sources.incremental(
         "updated_at", initial_value=workable.start_date_iso
     )
 ) -> Iterable[TDataItem]:
+   ...
 ```
 
 `updated_at`: Uses the dlt.sources.incremental method. Defaults to the function's start_date or Jan
@@ -211,7 +213,7 @@ To create your data pipeline using single loading and
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
         pipeline_name="workable",  # Use a custom name if desired
         destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -221,7 +223,7 @@ To create your data pipeline using single loading and
 
 1. To load all data:
 
-   ```python
+   ```py
    load_data = workable_source()
    load_info = pipeline.run(load_data)
    print(load_info)
@@ -232,7 +234,7 @@ To create your data pipeline using single loading and
 
 1. To load data from a specific date, including dependent endpoints:
 
-   ```python
+   ```py
    load_data = workable_source(start_date=datetime(2022, 1, 1), load_details=True)
    load_info = pipeline.run(load_data)
    print(load_info)
@@ -244,8 +246,8 @@ To create your data pipeline using single loading and
 
 1. To load custom endpoints “candidates” and “members”:
 
-   ```python
-   load_info = pipeline.run(load_data.with_resources("candidates", "members")
+   ```py
+   load_info = pipeline.run(load_data.with_resources("candidates", "members"))
    # print the information on data that was loaded
    print(load_info)
    ```
@@ -255,7 +257,7 @@ To create your data pipeline using single loading and
 1. To load data from the “jobs” endpoint and its dependent endpoints like "activities" and
    "application_form":
 
-   ```python
+   ```py
    load_data = workable_source(start_date=datetime(2022, 2, 1), load_details=True)
    # Set the load_details as True to load all the dependent endpoints.
    load_info = pipeline.run(load_data.with_resources("jobs","jobs_activities","jobs_application_form"))
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md b/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md
index 234483dca0..11567306d9 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md
@@ -84,7 +84,7 @@ Here's a summarized version:
 1. To get full token using the client id obtained above, you can follow the [instructions
    here.](https://developer.zendesk.com/documentation/ticketing/working-with-oauth/creating-and-using-oauth-tokens-with-the-api/#creating-the-access-token)
 
-   ```curl
+   ```sh
     curl https://{subdomain}.zendesk.com/api/v2/oauth/tokens.json \
    -X POST \
    -v -u {email_address}:{password} \
@@ -129,7 +129,7 @@ To generate Zendesk chat OAuth token, please refer to this
 1. Record the "CLIENT_ID" and "SUBDOMAIN".
 1. Format the below URL with your own CLIENT_ID and SUBDOMAIN, paste it into a new browser tab, and
    press Enter.
-   ```bash
+   ```sh
    https://www.zopim.com/oauth2/authorizations/new?response_type=token&client_id=CLIENT_ID&scope=read%20write&subdomain=SUBDOMAIN
    ```
 1. The call will be made, possibly asking you to log in and select 'Allow' to generate the token.
@@ -160,7 +160,7 @@ To get started with your data pipeline, follow these steps:
 
 1. Enter the following command:
 
-   ```bash
+   ```sh
    dlt init zendesk duckdb
    ```
 
@@ -183,7 +183,7 @@ For more information, read the guide on [how to add a verified source.](../../wa
    information securely, like access tokens. Keep this file safe. Here's its format for service
    account authentication:
 
-   ```python
+   ```py
    #Zendesk support credentials
    [sources.zendesk.credentials]
    subdomain = "subdomain" # Zendesk subdomain
@@ -215,20 +215,20 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
 
-   ```bash
+   ```sh
    pip install -r requirements.txt
    ```
 
 1. You're now ready to run the pipeline! To get started, run the following command:
 
-   ```bash
+   ```sh
    python zendesk_pipeline.py
    ```
 
 1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
 
-   ```bash
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
@@ -246,13 +246,14 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 This function retrieves data from Zendesk Talk for phone calls and voicemails.
 
-```python
+```py
 @dlt.source(max_table_nesting=2)
 def zendesk_talk(
     credentials: TZendeskCredentials = dlt.secrets.value,
     start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
     end_date: Optional[TAnyDateTime] = None,
 ) -> Iterable[DltResource]:
+   ...
 ```
 
 `credentials`: Authentication credentials.
@@ -266,13 +267,14 @@ run.
 
 This function loads data from Zendesk talk endpoint.
 
-```python
+```py
 def talk_resource(
     zendesk_client: ZendeskAPIClient,
     talk_endpoint_name: str,
     talk_endpoint: str,
     pagination_type: PaginationType,
 ) -> Iterator[TDataItem]:
+   ...
 ```
 
 `zendesk_client`: An instance of ZendeskAPIClient for making API calls to Zendesk Talk.
@@ -305,7 +307,7 @@ verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
-   ```python
+   ```py
    pipeline = dlt.pipeline(
        pipeline_name="dlt_zendesk_pipeline",  # Use a custom name if desired
        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
@@ -315,7 +317,7 @@ verified source.
 
 1. To load data related to support, talk and chat:
 
-   ```python
+   ```py
     #zendesk support source function
     data_support = zendesk_support(load_all=True)
     # zendesk chat source function
@@ -324,23 +326,23 @@ verified source.
     data_talk = zendesk_talk()
     # run pipeline with all 3 sources
     info = pipeline.run([data_support,data_chat,data_talk])
-    return info
+    print(info)
    ```
 
 1. To load data related to support, chat and talk in incremental mode:
 
-   ```python
-    pipeline = dlt.pipeline(
-        pipeline_name="dlt_zendesk_pipeline",  # Use a custom name if desired
-        destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
-        full_refresh = Fasle
-        dataset_name="sample_zendesk_data"  # Use a custom name if desired
+   ```py
+   pipeline = dlt.pipeline(
+      pipeline_name="dlt_zendesk_pipeline",  # Use a custom name if desired
+      destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
+      full_refresh = False,
+      dataset_name="sample_zendesk_data"  # Use a custom name if desired
    )
-    data = zendesk_support(load_all=True, start_date=start_date)
-    data_chat = zendesk_chat(start_date=start_date)
-    data_talk = zendesk_talk(start_date=start_date)
-    info = pipeline.run(data=[data, data_chat, data_talk])
-    return info
+   data = zendesk_support(load_all=True, start_date=start_date)
+   data_chat = zendesk_chat(start_date=start_date)
+   data_talk = zendesk_talk(start_date=start_date)
+   info = pipeline.run(data=[data, data_chat, data_talk])
+   print(info)
    ```
 
    > Supports incremental loading for Support, Chat, and Talk Endpoints. By default, it fetches data
@@ -350,7 +352,7 @@ verified source.
 1. To load historical data in weekly ranges from Jan 1st, 2023, then switch to incremental loading
    for new tickets.
 
-   ```python
+   ```py
     # Load ranges of dates to load between January 1st 2023 and today
     min_start_date = pendulum.DateTime(year=2023, month=1, day=1).in_timezone("UTC")
     max_end_date = pendulum.today()
diff --git a/docs/website/docs/dlt-ecosystem/visualizations/exploring-the-data.md b/docs/website/docs/dlt-ecosystem/visualizations/exploring-the-data.md
index c61805423b..ffe0abd082 100644
--- a/docs/website/docs/dlt-ecosystem/visualizations/exploring-the-data.md
+++ b/docs/website/docs/dlt-ecosystem/visualizations/exploring-the-data.md
@@ -12,7 +12,7 @@ To do so, run the [cli command](../../reference/command-line-interface.md#show-t
 below with your pipeline name. The pipeline name is the name of the
 Python file where your pipeline is defined and also displayed in your terminal when loading:
 
-```bash
+```sh
 dlt pipeline {pipeline_name} show
 ```
 
@@ -33,7 +33,7 @@ pipeline and hide many intricacies of correctly setting up the connection to you
 Execute any SQL query and get results following the Python
 [dbapi](https://peps.python.org/pep-0249/) spec. Below we fetch data from the customers table:
 
-```python
+```py
 pipeline = dlt.pipeline(destination="bigquery", dataset_name="crm")
 with pipeline.sql_client() as client:
     with client.execute_query(
@@ -54,7 +54,7 @@ natively (i.e. BigQuery and DuckDB), `dlt` uses the native method. Thanks to tha
 frames may be really fast! The example below reads GitHub reactions data from the `issues` table and
 counts reaction types.
 
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name="github_pipeline",
     destination="duckdb",
@@ -79,14 +79,14 @@ The native connection to your destination like BigQuery `Client` or DuckDB `Duck
 available in case you want to do anything special. Below we take the native connection to `duckdb`
 to get `DuckDBPyRelation` from a query:
 
-```python
+```py
 import dlt
 import duckdb
 
 pipeline = dlt.pipeline(destination="duckdb", dataset_name="github_reactions")
 with pipeline.sql_client() as client:
     conn = client.native_connection
-    rel = conn.sql('SELECT * FROM issues');
+    rel = conn.sql('SELECT * FROM issues')
     rel.limit(3).show()
 ```
 
diff --git a/docs/website/docs/examples/chess_production/index.md b/docs/website/docs/examples/chess_production/index.md
index d80558e745..ac305e943b 100644
--- a/docs/website/docs/examples/chess_production/index.md
+++ b/docs/website/docs/examples/chess_production/index.md
@@ -179,7 +179,7 @@ def load_data_with_retry(pipeline, data):
 
 :::warning
 To run this example you need to provide Slack incoming hook in `.dlt/secrets.toml`:
-```python
+```py
 [runtime]
 slack_incoming_hook="https://hooks.slack.com/services/***"
 ```
diff --git a/docs/website/docs/examples/custom_destination_bigquery/__init__.py b/docs/website/docs/examples/custom_destination_bigquery/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/website/docs/examples/custom_destination_bigquery/code/.dlt/config.toml b/docs/website/docs/examples/custom_destination_bigquery/code/.dlt/config.toml
new file mode 100644
index 0000000000..be627e6c11
--- /dev/null
+++ b/docs/website/docs/examples/custom_destination_bigquery/code/.dlt/config.toml
@@ -0,0 +1,2 @@
+# @@@DLT_SNIPPET_START example
+# @@@DLT_SNIPPET_END example
diff --git a/docs/website/docs/examples/custom_destination_bigquery/code/.dlt/example.secrets.toml b/docs/website/docs/examples/custom_destination_bigquery/code/.dlt/example.secrets.toml
new file mode 100644
index 0000000000..71f41f9878
--- /dev/null
+++ b/docs/website/docs/examples/custom_destination_bigquery/code/.dlt/example.secrets.toml
@@ -0,0 +1,10 @@
+# @@@DLT_SNIPPET_START example
+[destination.bigquery.credentials]
+client_email = ""
+private_key = ""
+project_id = ""
+token_uri = ""
+refresh_token = ""
+client_id = ""
+client_secret = ""
+# @@@DLT_SNIPPET_END example
diff --git a/docs/website/docs/examples/custom_destination_bigquery/code/__init__.py b/docs/website/docs/examples/custom_destination_bigquery/code/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/website/docs/examples/custom_destination_bigquery/code/custom_destination_bigquery-snippets.py b/docs/website/docs/examples/custom_destination_bigquery/code/custom_destination_bigquery-snippets.py
new file mode 100644
index 0000000000..16ff9c22b8
--- /dev/null
+++ b/docs/website/docs/examples/custom_destination_bigquery/code/custom_destination_bigquery-snippets.py
@@ -0,0 +1,81 @@
+from tests.utils import skipifgithubfork
+from tests.pipeline.utils import assert_load_info
+
+
+@skipifgithubfork
+def custom_destination_biquery_snippet() -> None:
+    # @@@DLT_SNIPPET_START example
+    import dlt
+    import pandas as pd
+    import pyarrow as pa
+    from google.cloud import bigquery
+
+    from dlt.common.configuration.specs import GcpServiceAccountCredentials
+
+    # constants
+    OWID_DISASTERS_URL = (
+        "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/"
+        "Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020)/"
+        "Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020).csv"
+    )
+    # this table needs to be manually created in your gc account
+    # format: "your-project.your_dataset.your_table"
+    BIGQUERY_TABLE_ID = "chat-analytics-rasa-ci.ci_streaming_insert.natural-disasters"
+
+    # dlt sources
+    @dlt.resource(name="natural_disasters")
+    def resource(url: str):
+        # load pyarrow table with pandas
+        table = pa.Table.from_pandas(pd.read_csv(url))
+        # we add a list type column to demontrate bigquery lists
+        table = table.append_column(
+            "tags",
+            pa.array(
+                [["disasters", "earthquakes", "floods", "tsunamis"]] * len(table),
+                pa.list_(pa.string()),
+            ),
+        )
+        # we add a struct type column to demonstrate bigquery structs
+        table = table.append_column(
+            "meta",
+            pa.array(
+                [{"loaded_by": "dlt"}] * len(table),
+                pa.struct([("loaded_by", pa.string())]),
+            ),
+        )
+        yield table
+
+    # dlt biquery custom destination
+    # we can use the dlt provided credentials class
+    # to retrieve the gcp credentials from the secrets
+    @dlt.destination(name="bigquery", loader_file_format="parquet", batch_size=0)
+    def bigquery_insert(
+        items, table, credentials: GcpServiceAccountCredentials = dlt.secrets.value
+    ) -> None:
+        client = bigquery.Client(
+            credentials.project_id, credentials.to_native_credentials(), location="US"
+        )
+        job_config = bigquery.LoadJobConfig(
+            autodetect=True,
+            source_format=bigquery.SourceFormat.PARQUET,
+            schema_update_options=bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION,
+        )
+        # since we have set the batch_size to 0, we get a filepath and can load the file directly
+        with open(items, "rb") as f:
+            load_job = client.load_table_from_file(f, BIGQUERY_TABLE_ID, job_config=job_config)
+        load_job.result()  # Waits for the job to complete.
+
+    __name__ = "__main__"  # @@@DLT_REMOVE
+    if __name__ == "__main__":
+        # run the pipeline and print load results
+        pipeline = dlt.pipeline(
+            pipeline_name="csv_to_bigquery_insert",
+            destination=bigquery_insert,
+            dataset_name="mydata",
+            full_refresh=True,
+        )
+        load_info = pipeline.run(resource(url=OWID_DISASTERS_URL))
+
+        print(load_info)
+        # @@@DLT_SNIPPET_END example
+        assert_load_info(load_info)
diff --git a/docs/website/docs/examples/custom_destination_bigquery/index.md b/docs/website/docs/examples/custom_destination_bigquery/index.md
new file mode 100644
index 0000000000..0531da23b1
--- /dev/null
+++ b/docs/website/docs/examples/custom_destination_bigquery/index.md
@@ -0,0 +1,119 @@
+---
+title: Custom destination with BigQuery
+description: Learn how use the custom destination to load to bigquery and use credentials
+keywords: [destination, credentials, example, bigquery, custom destination]
+---
+
+import Header from '../_examples-header.md';
+
+<Header
+    intro="This example demonstrates how to use the custom destination to load to BigQuery with automatic schema inference."
+    slug="custom_destination_bigquery"
+    run_file="custom_destination_bigquery"
+    destination="biqquery"/>
+
+## Custom destination BigQuery pipeline
+
+In this example, you'll find a Python script that demonstrates how to load Google Sheets data using the `dlt` library.
+
+We'll learn how to:
+- use [built-in credentials](../../general-usage/credentials/config_specs#gcp-credentials)
+- use the [custom destination](../../dlt-ecosystem/destinations/destination.md)
+- Use pyarrow tables to create complex column types on bigquery
+- Use bigquery autodetect=True for schema inference from parquet files
+
+### Your bigquery credentials in secrets.toml
+<!--@@@DLT_SNIPPET_START code/.dlt/example.secrets.toml::example-->
+```toml
+# you can just paste services.json as credentials
+[destination.bigquery.credentials]
+client_email = ""
+private_key = ""
+project_id = ""
+token_uri = ""
+refresh_token = ""
+client_id = ""
+client_secret = ""
+```
+<!--@@@DLT_SNIPPET_END code/.dlt/example.secrets.toml::example-->
+
+
+### Pipeline code
+
+<!--@@@DLT_SNIPPET_START code/custom_destination_bigquery-snippets.py::example-->
+```py
+import dlt
+import pandas as pd
+import pyarrow as pa
+from google.cloud import bigquery
+
+from dlt.common.configuration.specs import GcpServiceAccountCredentials
+
+# constants
+OWID_DISASTERS_URL = (
+    "https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/"
+    "Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020)/"
+    "Natural%20disasters%20from%201900%20to%202019%20-%20EMDAT%20(2020).csv"
+)
+# this table needs to be manually created in your gc account
+# format: "your-project.your_dataset.your_table"
+BIGQUERY_TABLE_ID = "chat-analytics-rasa-ci.ci_streaming_insert.natural-disasters"
+
+# dlt sources
+@dlt.resource(name="natural_disasters")
+def resource(url: str):
+    # load pyarrow table with pandas
+    table = pa.Table.from_pandas(pd.read_csv(url))
+    # we add a list type column to demontrate bigquery lists
+    table = table.append_column(
+        "tags",
+        pa.array(
+            [["disasters", "earthquakes", "floods", "tsunamis"]] * len(table),
+            pa.list_(pa.string()),
+        ),
+    )
+    # we add a struct type column to demonstrate bigquery structs
+    table = table.append_column(
+        "meta",
+        pa.array(
+            [{"loaded_by": "dlt"}] * len(table),
+            pa.struct([("loaded_by", pa.string())]),
+        ),
+    )
+    yield table
+
+# dlt biquery custom destination
+# we can use the dlt provided credentials class
+# to retrieve the gcp credentials from the secrets
+@dlt.destination(name="bigquery", loader_file_format="parquet", batch_size=0)
+def bigquery_insert(
+    items, table, credentials: GcpServiceAccountCredentials = dlt.secrets.value
+) -> None:
+    client = bigquery.Client(
+        credentials.project_id, credentials.to_native_credentials(), location="US"
+    )
+    job_config = bigquery.LoadJobConfig(
+        autodetect=True,
+        source_format=bigquery.SourceFormat.PARQUET,
+        schema_update_options=bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION,
+    )
+    # since we have set the batch_size to 0, we get a filepath and can load the file directly
+    with open(items, "rb") as f:
+        load_job = client.load_table_from_file(f, BIGQUERY_TABLE_ID, job_config=job_config)
+    load_job.result()  # Waits for the job to complete.
+
+if __name__ == "__main__":
+    # run the pipeline and print load results
+    pipeline = dlt.pipeline(
+        pipeline_name="csv_to_bigquery_insert",
+        destination=bigquery_insert,
+        dataset_name="mydata",
+        full_refresh=True,
+    )
+    load_info = pipeline.run(resource(url=OWID_DISASTERS_URL))
+
+    print(load_info)
+
+    assert_load_info(load_info)
+```
+<!--@@@DLT_SNIPPET_END code/custom_destination_bigquery-snippets.py::example-->
diff --git a/docs/website/docs/examples/google_sheets/index.md b/docs/website/docs/examples/google_sheets/index.md
index 4af35f6dac..3bf3f858d8 100644
--- a/docs/website/docs/examples/google_sheets/index.md
+++ b/docs/website/docs/examples/google_sheets/index.md
@@ -27,7 +27,7 @@ This example is for educational purposes. For best practices, we recommend using
 
 ### Install Google client library
 
-```shell
+```sh
  pip install google-api-python-client
 ```
 
diff --git a/docs/website/docs/examples/incremental_loading/code/zendesk-snippets.py b/docs/website/docs/examples/incremental_loading/code/zendesk-snippets.py
index ff12a00fca..05ea18cb9e 100644
--- a/docs/website/docs/examples/incremental_loading/code/zendesk-snippets.py
+++ b/docs/website/docs/examples/incremental_loading/code/zendesk-snippets.py
@@ -140,4 +140,4 @@ def get_pages(
 
     # check that stuff was loaded
     row_counts = pipeline.last_trace.last_normalize_info.row_counts
-    assert row_counts["ticket_events"] == 17
\ No newline at end of file
+    assert row_counts["ticket_events"] == 17
diff --git a/docs/website/docs/examples/nested_data/index.md b/docs/website/docs/examples/nested_data/index.md
index b2b5ee2792..8a5c17604c 100644
--- a/docs/website/docs/examples/nested_data/index.md
+++ b/docs/website/docs/examples/nested_data/index.md
@@ -26,7 +26,7 @@ We'll learn how to:
 
 ### Install pymongo
 
-```shell
+```sh
  pip install pymongo>=4.3.3
 ```
 
diff --git a/docs/website/docs/examples/pdf_to_weaviate/index.md b/docs/website/docs/examples/pdf_to_weaviate/index.md
index cc2ef01e33..5b889b858d 100644
--- a/docs/website/docs/examples/pdf_to_weaviate/index.md
+++ b/docs/website/docs/examples/pdf_to_weaviate/index.md
@@ -14,7 +14,7 @@ import Header from '../_examples-header.md';
 
 Additionally we'll use PyPDF2 to extract text from PDFs. Make sure you have it installed:
 
-```shell
+```sh
 pip install PyPDF2
 ```
 
diff --git a/docs/website/docs/examples/qdrant_zendesk/index.md b/docs/website/docs/examples/qdrant_zendesk/index.md
index 7920619b26..b71840073b 100644
--- a/docs/website/docs/examples/qdrant_zendesk/index.md
+++ b/docs/website/docs/examples/qdrant_zendesk/index.md
@@ -28,7 +28,7 @@ First, configure the destination credentials for [Qdrant](https://dlthub.com/doc
 
 Next, make sure you have the following dependencies installed:
 
-```commandline
+```sh
 pip install qdrant-client>=1.6.9
 pip install fastembed>=0.1.1
 ```
@@ -170,13 +170,13 @@ response = qdrant_client.query(
 <!--@@@DLT_SNIPPET_END ./code/qdrant-snippets.py::get_response-->
 
 The query above gives stores the following results in the `response` variable:
-```json
+```py
 [QueryResponse(id='6aeacd21-b3d0-5174-97ef-5aaa59486414', embedding=None, metadata={'_dlt_id': 'Nx3wBiL29xTgaQ', '_dlt_load_id': '1700130284.002391', 'allow_attachments': True, 'allow_channelback': False, 'assignee_id': 12765072569105, 'brand_id': 12765073054225, 'created_at': '2023-09-01T11:19:25+00:00', 'custom_status_id': 12765028278545, 'description': 'I have been trying to cancel my subscription but the system won’t let me do it. Can you please help?', 'from_messaging_channel': False, 'generated_timestamp': 1693567167, 'group_id': 12765036328465, 'has_incidents': False, 'id': 12, 'is_public': True, 'organization_id': 12765041119505, 'raw_subject': 'Unable to Cancel Subscription', 'requester_id': 12765072569105, 'status': 'open', 'subject': 'Unable to Cancel Subscription', 'submitter_id': 12765072569105, 'tags': ['test1'], 'test_field': 'test1', 'ticket_form_id': 12765054772497, 'updated_at': '2023-09-01T11:19:25+00:00', 'url': 'https://d3v-dlthub.zendesk.com/api/v2/tickets/12.json', 'via__channel': 'web'}, document='', score=0.89545774),
  QueryResponse(id='a22189c1-70ab-5421-938b-1caae3e7d6d8', embedding=None, metadata={'_dlt_id': 'bc/xloksL89EUg', '_dlt_load_id': '1700130284.002391', 'allow_attachments': True, 'allow_channelback': False, 'assignee_id': 12765072569105, 'brand_id': 12765073054225, 'created_at': '2023-07-18T17:23:42+00:00', 'custom_status_id': 12765028278545, 'description': 'ABCDEF', 'from_messaging_channel': False, 'generated_timestamp': 1689701023, 'group_id': 12765036328465, 'has_incidents': False, 'id': 4, 'is_public': True, 'organization_id': 12765041119505, 'raw_subject': 'What is this ticket', 'requester_id': 12765072569105, 'status': 'open', 'subject': 'What is this ticket', 'submitter_id': 12765072569105, 'tags': ['test1'], 'test_field': 'test1', 'ticket_form_id': 12765054772497, 'updated_at': '2023-07-18T17:23:42+00:00', 'url': 'https://d3v-dlthub.zendesk.com/api/v2/tickets/4.json', 'via__channel': 'web'}, document='', score=0.8643349),
  QueryResponse(id='ce2f1c5c-41c3-56c3-a31d-2399a7a9239d', embedding=None, metadata={'_dlt_id': 'ZMuFJZo0AJxV4A', '_dlt_load_id': '1700130284.002391', 'allow_attachments': True, 'allow_channelback': False, 'assignee_id': 12765072569105, 'brand_id': 12765073054225, 'created_at': '2023-03-14T10:52:28+00:00', 'custom_status_id': 12765028278545, 'description': 'X', 'from_messaging_channel': False, 'generated_timestamp': 1696163084, 'group_id': 12765036328465, 'has_incidents': False, 'id': 2, 'is_public': True, 'priority': 'high', 'raw_subject': 'SCRUBBED', 'requester_id': 13726460510097, 'status': 'deleted', 'subject': 'SCRUBBED', 'submitter_id': 12765072569105, 'tags': [], 'ticket_form_id': 13726337882769, 'type': 'question', 'updated_at': '2023-09-01T12:10:35+00:00', 'url': 'https://d3v-dlthub.zendesk.com/api/v2/tickets/2.json', 'via__channel': 'web'}, document='', score=0.8467072)]
 ```
 To get a closer look at what the Zendesk ticket was, and how dlt dealt with it, we can index into the metadata of the first `QueryResponse` object:
-```json lines
+```py
 {'_dlt_id': 'Nx3wBiL29xTgaQ',
  '_dlt_load_id': '1700130284.002391',
  'allow_attachments': True,
diff --git a/docs/website/docs/general-usage/credentials/config_providers.md b/docs/website/docs/general-usage/credentials/config_providers.md
index c0dc459da0..cf23b5d5dc 100644
--- a/docs/website/docs/general-usage/credentials/config_providers.md
+++ b/docs/website/docs/general-usage/credentials/config_providers.md
@@ -38,7 +38,7 @@ providers.
 
 ### Example
 
-```python
+```py
 @dlt.source
 def google_sheets(
     spreadsheet_id=dlt.config.value,
@@ -99,6 +99,19 @@ the `private_key` for Google credentials. It will look
 1. first in env variable `MY_SECTION__GCP_CREDENTIALS__PRIVATE_KEY` and if not found,
 1. in `secrets.toml` with key `my_section.gcp_credentials.private_key`.
 
+
+:::info
+While using Google secrets provider please make sure your pipeline name
+contains no whitespace or any other punctuation characters except "-" and "_".
+
+Per Google the secret name can contain
+
+    1. Uppercase and lowercase letters,
+    2. Numerals,
+    3. Hyphens,
+    4. Underscores.
+:::
+
 ### Environment provider
 
 Looks for the values in the environment variables.
@@ -120,7 +133,7 @@ current Working Directory**.
 
 Example: If your working directory is `my_dlt_project` and your project has the following structure:
 
-```
+```text
 my_dlt_project:
   |
   pipelines/
diff --git a/docs/website/docs/general-usage/credentials/config_specs.md b/docs/website/docs/general-usage/credentials/config_specs.md
index 07e56b3e14..e93e1c466a 100644
--- a/docs/website/docs/general-usage/credentials/config_specs.md
+++ b/docs/website/docs/general-usage/credentials/config_specs.md
@@ -21,7 +21,7 @@ service account credentials, while `ConnectionStringCredentials` handles databas
 As an example, let's use `ConnectionStringCredentials` which represents a database connection
 string.
 
-```python
+```py
 from dlt.sources.credentials import ConnectionStringCredentials
 
 @dlt.source
@@ -60,17 +60,17 @@ dsn.password="loader"
 
 You can explicitly provide credentials in various forms:
 
-```python
+```py
 query("SELECT * FROM customers", "postgres://loader@localhost:5432/dlt_data")
 # or
-query("SELECT * FROM customers", {"database": "dlt_data", "username": "loader"...})
+query("SELECT * FROM customers", {"database": "dlt_data", "username": "loader"})
 ```
 
 ## Built in credentials
 
 We have some ready-made credentials you can reuse:
 
-```python
+```py
 from dlt.sources.credentials import ConnectionStringCredentials
 from dlt.sources.credentials import OAuth2Credentials
 from dlt.sources.credentials import GcpServiceAccountCredentials, GcpOAuthCredentials
@@ -87,7 +87,7 @@ and additional query parameters.
 This class provides methods for parsing and generating connection strings.
 
 #### Usage
-```python
+```py
 credentials = ConnectionStringCredentials()
 
 # Set the necessary attributes
@@ -117,7 +117,7 @@ client secret, refresh token, and access token.
 It also allows for the addition of scopes and provides methods for client authentication.
 
 Usage:
-```python
+```py
 credentials = OAuth2Credentials(
     client_id="CLIENT_ID",
     client_secret="CLIENT_SECRET",
@@ -153,7 +153,7 @@ This class provides methods to retrieve native credentials for Google clients.
 - You may just pass the `service.json` as string or dictionary (in code and via config providers).
 - Or default credentials will be used.
 
-```python
+```py
 credentials = GcpServiceAccountCredentials()
 # Parse a native value (ServiceAccountCredentials)
 # Accepts a native value, which can be either an instance of ServiceAccountCredentials
@@ -163,7 +163,7 @@ native_value = {"private_key": ".."} # or "path/to/services.json"
 credentials.parse_native_representation(native_value)
 ```
 or more preferred use:
-```python
+```py
 import dlt
 from dlt.sources.credentials import GcpServiceAccountCredentials
 
@@ -204,7 +204,7 @@ serialized OAuth client secrets JSON.
 This class provides methods for authentication and obtaining access tokens.
 
 ##### Usage
-```python
+```py
 oauth_credentials = GcpOAuthCredentials()
 
 # Accepts a native value, which can be either an instance of GoogleOAuth2Credentials
@@ -214,7 +214,7 @@ native_value_oauth = {"client_secret": ...}
 oauth_credentials.parse_native_representation(native_value_oauth)
 ```
 or more preferred use:
-```python
+```py
 import dlt
 from dlt.sources.credentials import GcpOAuthCredentials
 
@@ -277,7 +277,7 @@ It inherits the ability to manage default credentials and extends it with method
 for handling partial credentials and converting credentials to a botocore session.
 
 #### Usage
-```python
+```py
 credentials = AwsCredentials()
 # Set the necessary attributes
 credentials.aws_access_key_id = "ACCESS_KEY_ID"
@@ -285,7 +285,7 @@ credentials.aws_secret_access_key = "SECRET_ACCESS_KEY"
 credentials.region_name = "us-east-1"
 ```
 or
-```python
+```py
 # Imports an external boto3 session and sets the credentials properties accordingly.
 import botocore.session
 
@@ -295,7 +295,7 @@ credentials.parse_native_representation(session)
 print(credentials.aws_access_key_id)
 ```
 or more preferred use:
-```python
+```py
 @dlt.source
 def aws_readers(
     bucket_url: str = dlt.config.value,
@@ -340,14 +340,14 @@ handling partial credentials and converting credentials to a format suitable
 for interacting with Azure Blob Storage using the adlfs library.
 
 #### Usage
-```python
+```py
 credentials = AzureCredentials()
 # Set the necessary attributes
 credentials.azure_storage_account_name = "ACCOUNT_NAME"
 credentials.azure_storage_account_key = "ACCOUNT_KEY"
 ```
 or more preferred use:
-```python
+```py
 @dlt.source
 def azure_readers(
     bucket_url: str = dlt.config.value,
@@ -388,7 +388,7 @@ decorated function.
 
 Example:
 
-```python
+```py
 @dlt.source
 def zen_source(credentials: Union[ZenApiKeyCredentials, ZenEmailCredentials, str] = dlt.secrets.value, some_option: bool = False):
   # depending on what the user provides in config, ZenApiKeyCredentials or ZenEmailCredentials will be injected in `credentials` argument
@@ -432,7 +432,7 @@ This is used a lot in the `dlt` core and may become useful for complicated sourc
 In fact, for each decorated function a spec is synthesized. In case of `google_sheets` following
 class is created:
 
-```python
+```py
 from dlt.sources.config import configspec, with_config
 
 @configspec
diff --git a/docs/website/docs/general-usage/credentials/configuration.md b/docs/website/docs/general-usage/credentials/configuration.md
index 9b2d392883..ec8e5fe32a 100644
--- a/docs/website/docs/general-usage/credentials/configuration.md
+++ b/docs/website/docs/general-usage/credentials/configuration.md
@@ -25,7 +25,7 @@ When done right you'll be able to run the same pipeline script during developmen
 In the example below, the `google_sheets` source function is used to read selected tabs from Google Sheets.
 It takes several arguments that specify the spreadsheet, the tab names and the Google credentials to be used when extracting data.
 
-```python
+```py
 @dlt.source
 def google_sheets(
     spreadsheet_id=dlt.config.value,
@@ -68,14 +68,14 @@ You are free to call the function above as usual and pass all the arguments in t
 Instead let `dlt` to do the work and leave it to [injection mechanism](#injection-mechanism) that looks for function arguments in the config files or environment variables and adds them to your explicit arguments during a function call. Below are two most typical examples:
 
 1. Pass spreadsheet id and tab names in the code, inject credentials from the secrets:
-    ```python
+    ```py
     data_source = google_sheets("23029402349032049", ["tab1", "tab2"])
     ```
     `credentials` value will be injected by the `@source` decorator (e.g. from `secrets.toml`).
     `spreadsheet_id` and `tab_names` take values from the call arguments.
 
 2. Inject all the arguments from config / secrets
-    ```python
+    ```py
     data_source = google_sheets()
     ```
     `credentials` value will be injected by the `@source` decorator (e.g. from **secrets.toml**).
@@ -97,16 +97,16 @@ Where do the configs and secrets come from? By default, `dlt` looks in two **con
   Secrets in **.dlt/secrets.toml**. `dlt` will look for `credentials`,
   ```toml
   [credentials]
-  client_email = <client_email from services.json>
-  private_key = <private_key from services.json>
-  project_id = <project_id from services json>
+  client_email = "<client_email from services.json>"
+  private_key = "<private_key from services.json>"
+  project_id = "<project_id from services json>"
   ```
   Note that **credentials** will be evaluated as dictionary containing **client_email**, **private_key** and **project_id** as keys. It is standard TOML behavior.
 - [Environment Variables](config_providers#environment-provider):
-  ```python
-  CREDENTIALS=<service.json>
-  SPREADSHEET_ID=1HhWHjqouQnnCIZAFa2rL6vT91YRN8aIhts22SUUR580
-  TAB_NAMES=tab1,tab2
+  ```toml
+  CREDENTIALS="<service.json>"
+  SPREADSHEET_ID="1HhWHjqouQnnCIZAFa2rL6vT91YRN8aIhts22SUUR580"
+  TAB_NAMES=["tab1", "tab2"]
   ```
   We pass the JSON contents of `service.json` file to `CREDENTIALS` and we specify tab names as comma-delimited values.  Environment variables are always in **upper case**.
 
@@ -123,7 +123,7 @@ There are many ways you can organize your configs and secrets. The example above
 ### Do not hardcode secrets
 You should never do that. Sooner or later your private key will leak.
 
-```python
+```py
 # WRONG!:
 # provide all values directly - wrong but possible.
 # secret values should never be present in the code!
@@ -137,7 +137,7 @@ data_source = google_sheets(
 ### Pass secrets in code from external providers
 You can get the secret values from your own providers. Below we take **credentials** for our `google_sheets` source from Airflow base hook:
 
-```python
+```py
 from airflow.hooks.base_hook import BaseHook
 
 # get it from airflow connections or other credential store
@@ -163,7 +163,7 @@ Doing so provides several benefits:
 1. You can request [built-in and custom credentials](config_specs.md) (i.e. connection strings, AWS / GCP / Azure credentials).
 1. You can specify a set of possible types via `Union` i.e. OAuth or API Key authorization.
 
-```python
+```py
 @dlt.source
 def google_sheets(
     spreadsheet_id: str = dlt.config.value,
@@ -171,7 +171,7 @@ def google_sheets(
     credentials: GcpServiceAccountCredentials = dlt.secrets.value,
     only_strings: bool = False
 ):
-  ...
+    ...
 ```
 
 Now:
@@ -189,7 +189,7 @@ In case of `GcpServiceAccountCredentials`:
 ## Read configs and secrets yourself
 `dlt.secrets` and `dlt.config` provide dictionary-like access to configuration values and secrets, respectively.
 
-```python
+```py
 # use `dlt.secrets` and `dlt.config` to explicitly take
 # those values from providers from the explicit keys
 data_source = google_sheets(
@@ -202,14 +202,14 @@ data_source.run(destination="bigquery")
 ```
 `dlt.config` and `dlt.secrets` behave like dictionaries from which you can request a value with any key name. `dlt` will look in all [config providers](#injection-mechanism) - TOML files, env variables etc. just like it does with the standard section layout. You can also use `dlt.config.get()` or `dlt.secrets.get()` to
 request value cast to a desired type. For example:
-```python
+```py
 credentials = dlt.secrets.get("my_section.gcp_credentials", GcpServiceAccountCredentials)
 ```
 Creates `GcpServiceAccountCredentials` instance out of values (typically a dictionary) under **my_section.gcp_credentials** key.
 
 ### Write configs and secrets in code
 **dlt.config** and **dlt.secrets** can be also used as setters. For example:
-```python
+```py
 dlt.config["sheet_id"] = "23029402349032049"
 dlt.secrets["destination.postgres.credentials"] = BaseHook.get_connection('postgres_dsn').extra
 ```
@@ -263,9 +263,9 @@ Here is the simplest default layout for our `google_sheets` example.
 
 ```toml
 [credentials]
-client_email = <client_email from services.json>
-private_key = <private_key from services.json>
-project_id = <project_id from services json>
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
+project_id = "<project_id from services json>"
 ```
 
 **config.toml**
@@ -284,9 +284,9 @@ This makes sure that `google_sheets` source does not share any secrets and confi
 
 ```toml
 [sources.google_sheets.credentials]
-client_email = <client_email from services.json>
-private_key = <private_key from services.json>
-project_id = <project_id from services json>
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
+project_id = "<project_id from services json>"
 ```
 
 **config.toml**
@@ -305,9 +305,9 @@ Use this if you want to read and pass the config/secrets yourself
 ```toml
 [my_section]
 
-    [my_section.gcp_credentials]
-    client_email = <client_email from services.json>
-    private_key = <private_key from services.json>
+[my_section.gcp_credentials]
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
 ```
 
 **config.toml**
@@ -316,9 +316,9 @@ Use this if you want to read and pass the config/secrets yourself
 [my_section]
 tabs=["tab1", "tab2"]
 
-    [my_section.gcp_credentials]
-    # I prefer to keep my project id in config file and private key in secrets
-    project_id = <project_id from services json>
+[my_section.gcp_credentials]
+# I prefer to keep my project id in config file and private key in secrets
+project_id = "<project_id from services json>"
 ```
 
 ### Default layout and default key lookup during injection
@@ -328,7 +328,7 @@ makes it easy to configure simple cases but also provides a room for more explic
 complex cases i.e. having several sources with different credentials or even hosting several pipelines
 in the same project sharing the same config and credentials.
 
-```
+```text
 pipeline_name
     |
     |-sources
@@ -368,15 +368,15 @@ Example: We use the `bigquery` destination and the `google_sheets` source. They
 ```toml
 # google sheet credentials
 [sources.credentials]
-client_email = <client_email from services.json>
-private_key = <private_key from services.json>
-project_id = <project_id from services json>
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
+project_id = "<project_id from services json>"
 
 # bigquery credentials
 [destination.credentials]
-client_email = <client_email from services.json>
-private_key = <private_key from services.json>
-project_id = <project_id from services json>
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
+project_id = "<project_id from services json>"
 ```
 
 Now when `dlt` looks for destination credentials, it will start with `destination.bigquery.credentials`, eliminate `bigquery` and stop at `destination.credentials`.
@@ -388,21 +388,21 @@ Example: let's be even more explicit and use a full section path possible.
 ```toml
 # google sheet credentials
 [sources.google_sheets.credentials]
-client_email = <client_email from services.json>
-private_key = <private_key from services.json>
-project_id = <project_id from services json>
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
+project_id = "<project_id from services json>"
 
 # google analytics credentials
 [sources.google_analytics.credentials]
-client_email = <client_email from services.json>
-private_key = <private_key from services.json>
-project_id = <project_id from services json>
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
+project_id = "<project_id from services json>"
 
 # bigquery credentials
 [destination.bigquery.credentials]
-client_email = <client_email from services.json>
-private_key = <private_key from services.json>
-project_id = <project_id from services json>
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
+project_id = "<project_id from services json>"
 ```
 
 Now we can separate credentials for different sources as well.
@@ -418,18 +418,18 @@ Example: the pipeline is named `ML_sheets`.
 
 ```toml
 [ML_sheets.credentials]
-client_email = <client_email from services.json>
-private_key = <private_key from services.json>
-project_id = <project_id from services json>
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
+project_id = "<project_id from services json>"
 ```
 
 or maximum path:
 
 ```toml
 [ML_sheets.sources.google_sheets.credentials]
-client_email = <client_email from services.json>
-private_key = <private_key from services.json>
-project_id = <project_id from services json>
+client_email = "<client_email from services.json>"
+private_key = "<private_key from services.json>"
+project_id = "<project_id from services json>"
 ```
 
 ### The `sources` section
@@ -455,7 +455,7 @@ Now we can finally understand the `ConfigFieldMissingException`.
 
 Let's run `chess.py` example without providing the password:
 
-```
+```sh
 $ CREDENTIALS="postgres://loader@localhost:5432/dlt_data" python chess.py
 ...
 dlt.common.configuration.exceptions.ConfigFieldMissingException: Following fields are missing: ['password'] in configuration with spec PostgresCredentials
diff --git a/docs/website/docs/general-usage/customising-pipelines/pseudonymizing_columns.md b/docs/website/docs/general-usage/customising-pipelines/pseudonymizing_columns.md
index 3f665bd0fb..ba0b13636b 100644
--- a/docs/website/docs/general-usage/customising-pipelines/pseudonymizing_columns.md
+++ b/docs/website/docs/general-usage/customising-pipelines/pseudonymizing_columns.md
@@ -11,7 +11,7 @@ consistently achieve the same mapping. If instead you wish to anonymize, you can
 replace it with a constant. In the example below, we create a dummy source with a PII column called
 "name", which we replace with deterministic hashes (i.e. replacing the German umlaut).
 
-```python
+```py
 import dlt
 import hashlib
 
diff --git a/docs/website/docs/general-usage/customising-pipelines/removing_columns.md b/docs/website/docs/general-usage/customising-pipelines/removing_columns.md
index 8493ffaec5..3163062ced 100644
--- a/docs/website/docs/general-usage/customising-pipelines/removing_columns.md
+++ b/docs/website/docs/general-usage/customising-pipelines/removing_columns.md
@@ -14,7 +14,7 @@ Let's create a sample pipeline demonstrating the process of removing a column.
 
 1. Create a source function that creates dummy data as follows:
 
-   ```python
+   ```py
    import dlt
 
    # This function creates a dummy data source.
@@ -31,7 +31,7 @@ Let's create a sample pipeline demonstrating the process of removing a column.
 
 1. Next, create a function to filter out columns from the data before loading it into a database as follows:
 
-   ```python
+   ```py
    from typing import Dict, List, Optional
 
    def remove_columns(doc: Dict, remove_columns: Optional[List[str]] = None) -> Dict:
@@ -53,7 +53,7 @@ Let's create a sample pipeline demonstrating the process of removing a column.
 
 1. Next, declare the columns to be removed from the table, and then modify the source as follows:
 
-   ```python
+   ```py
    # Example columns to remove:
    remove_columns_list = ["country_code"]
 
@@ -67,7 +67,7 @@ Let's create a sample pipeline demonstrating the process of removing a column.
    ```
 1. You can optionally inspect the result:
 
-   ```python
+   ```py
    for row in data_source:
        print(row)
    #{'id': 0, 'name': 'Jane Washington 0'}
@@ -77,7 +77,7 @@ Let's create a sample pipeline demonstrating the process of removing a column.
 
 1. At last, create a pipeline:
 
-   ```python
+   ```py
    # Integrating with a DLT pipeline
    pipeline = dlt.pipeline(
        pipeline_name='example',
diff --git a/docs/website/docs/general-usage/customising-pipelines/renaming_columns.md b/docs/website/docs/general-usage/customising-pipelines/renaming_columns.md
index e58dae6d9d..04e4d33b13 100644
--- a/docs/website/docs/general-usage/customising-pipelines/renaming_columns.md
+++ b/docs/website/docs/general-usage/customising-pipelines/renaming_columns.md
@@ -12,7 +12,7 @@ In the example below, we create a dummy source with special characters in the na
 function that we intend to apply to the resource to modify its output (i.e. replacing the German
 umlaut): `replace_umlauts_in_dict_keys`.
 
-```python
+```py
 import dlt
 
 # create a dummy source with umlauts (special characters) in key names (um)
diff --git a/docs/website/docs/general-usage/data-enrichments/currency_conversion_data_enrichment.md b/docs/website/docs/general-usage/data-enrichments/currency_conversion_data_enrichment.md
index 6b09510f68..f8bd179422 100644
--- a/docs/website/docs/general-usage/data-enrichments/currency_conversion_data_enrichment.md
+++ b/docs/website/docs/general-usage/data-enrichments/currency_conversion_data_enrichment.md
@@ -77,7 +77,7 @@ currency_conversion_enrichment/
 
 1. Here's the resource that yields the sample data as discussed above:
 
-   ```python
+   ```py
    @dlt.resource()
    def enriched_data_part_two():
        data_enrichment_part_one = [
@@ -113,14 +113,14 @@ API token.
    information securely, like access tokens. Keep this file safe. Here's its format for service
    account authentication:
 
-   ```python
+   ```py
    [sources]
    api_key= "Please set me up!"  #ExchangeRate-API key
    ```
 
 1. Create the `converted_amount` function as follows:
 
-   ```python
+   ```py
    # @transformer(data_from=enriched_data_part_two)
    def converted_amount(record):
         """
@@ -210,7 +210,7 @@ API token.
 
 1. Here, we create the pipeline and use the `add_map` functionality:
 
-   ```python
+   ```py
    # Create the pipeline
    pipeline = dlt.pipeline(
        pipeline_name="data_enrichment_two",
@@ -229,7 +229,7 @@ API token.
    To do so, you need to add the transformer decorator at the top of the `converted_amount` function.
    For `pipeline.run`, you can use the following code:
 
-   ```python
+   ```py
    # using fetch_average_price as a transformer function
    load_info = pipeline.run(
        enriched_data_part_two | converted_amount,
@@ -246,19 +246,19 @@ API token.
 1. Install necessary dependencies for the preferred
    [destination](../../dlt-ecosystem/destinations/), For example, duckdb:
 
-   ```
+   ```sh
    pip install dlt[duckdb]
    ```
 
 1. Run the pipeline with the following command:
 
-   ```
+   ```sh
    python currency_enrichment_pipeline.py
    ```
 
 1. To ensure that everything loads as expected, use the command:
 
-   ```
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
diff --git a/docs/website/docs/general-usage/data-enrichments/url-parser-data-enrichment.md b/docs/website/docs/general-usage/data-enrichments/url-parser-data-enrichment.md
index f4578d065f..ab71d3d1d0 100644
--- a/docs/website/docs/general-usage/data-enrichments/url-parser-data-enrichment.md
+++ b/docs/website/docs/general-usage/data-enrichments/url-parser-data-enrichment.md
@@ -29,7 +29,7 @@ you can use any API you prefer.
 
 By default the URL Parse API will return a JSON response like:
 
-```text
+```json
 {
     "authority": "urlparse.com",
     "domain": "urlparse.com",
@@ -73,7 +73,7 @@ understanding, you may explore all three enrichments sequentially in the noteboo
 Alternatively, to create a data enrichment pipeline, you can start by creating the following
 directory structure:
 
-```python
+```text
 url_parser_enrichment/
 ├── .dlt/
 │   └── secrets.toml
@@ -100,41 +100,41 @@ Let's examine a synthetic dataset created for this article. It includes:
 
 Here's the resource that yields the sample data as discussed above:
 
-```python
-  import dlt
+```py
+    import dlt
 
-  @dlt.resource(write_disposition="append")
-  def tracked_data():
-  """
-  A generator function that yields a series of dictionaries, each representing
-  user tracking data.
+    @dlt.resource(write_disposition="append")
+    def tracked_data():
+        """
+        A generator function that yields a series of dictionaries, each representing
+        user tracking data.
 
-  This function is decorated with `dlt.resource` to integrate into the DLT (Data
-  Loading Tool) pipeline. The `write_disposition` parameter is set to "append" to
-  ensure that data from this generator is appended to the existing data in the
-  destination table.
+        This function is decorated with `dlt.resource` to integrate into the DLT (Data
+        Loading Tool) pipeline. The `write_disposition` parameter is set to "append" to
+        ensure that data from this generator is appended to the existing data in the
+        destination table.
 
-  Yields:
-      dict: A dictionary with keys 'user_id', 'device_name', and 'page_referer',
-      representing the user's tracking data including their device and the page
-      they were referred from.
- """
+        Yields:
+            dict: A dictionary with keys 'user_id', 'device_name', and 'page_referer',
+            representing the user's tracking data including their device and the page
+            they were referred from.
+        """
 
-     # Sample data representing tracked user data
-     sample_data = [
+        # Sample data representing tracked user data
+        sample_data = [
         {
                 "user_id": 1,
                 "device_name": "Sony Experia XZ",
                 "page_referer": "https://b2venture.lightning.force.com/"
         },
-         """
-         Data for other users
-         """
-     ]
-
-     # Yielding each user's data as a dictionary
-     for user_data in sample_data:
-         yield user_data
+            """
+            Data for other users
+            """
+        ]
+
+        # Yielding each user's data as a dictionary
+        for user_data in sample_data:
+            yield user_data
 ```
 
 ### 2. Create `url_parser` function
@@ -143,7 +143,7 @@ We use a free service called [URL Parse API](https://urlparse.com/), to parse th
 need to register to use this service neither get an API key.
 
 1. Create a `url_parser` function as follows:
-   ```python
+   ```py
    # @dlt.transformer(data_from=tracked_data)
    def url_parser(record):
        """
@@ -195,7 +195,7 @@ need to register to use this service neither get an API key.
 
 1. Here, we create the pipeline and use the `add_map` functionality:
 
-   ```python
+   ```py
    # Create the pipeline
    pipeline = dlt.pipeline(
        pipeline_name="data_enrichment_three",
@@ -214,7 +214,7 @@ need to register to use this service neither get an API key.
    do so, you need to add the transformer decorator at the top of the `url_parser` function. For
    `pipeline.run`, you can use the following code:
 
-   ```python
+   ```py
    # using fetch_average_price as a transformer function
    load_info = pipeline.run(
        tracked_data | url_parser,
@@ -230,19 +230,19 @@ need to register to use this service neither get an API key.
 1. Install necessary dependencies for the preferred
    [destination](https://dlthub.com/docs/dlt-ecosystem/destinations/), For example, duckdb:
 
-   ```
+   ```sh
    pip install dlt[duckdb]
    ```
 
 1. Run the pipeline with the following command:
 
-   ```
+   ```sh
    python url_enrichment_pipeline.py
    ```
 
 1. To ensure that everything loads as expected, use the command:
 
-   ```
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
diff --git a/docs/website/docs/general-usage/data-enrichments/user_agent_device_data_enrichment.md b/docs/website/docs/general-usage/data-enrichments/user_agent_device_data_enrichment.md
index 8b33a852a8..6b07845689 100644
--- a/docs/website/docs/general-usage/data-enrichments/user_agent_device_data_enrichment.md
+++ b/docs/website/docs/general-usage/data-enrichments/user_agent_device_data_enrichment.md
@@ -41,7 +41,7 @@ Here's the link to the notebook:
 ### B. Create a pipeline
 Alternatively, to create a data enrichment pipeline, you can start by creating the following directory structure:
 
-```python
+```text
 user_device_enrichment/
 ├── .dlt/
 │   └── secrets.toml
@@ -67,42 +67,42 @@ user_device_enrichment/
 
    Here's the resource that yields the sample data as discussed above:
 
-   ```python
-     import dlt
-
-     @dlt.resource(write_disposition="append")
-     def tracked_data():
-     """
-     A generator function that yields a series of dictionaries, each representing
-     user tracking data.
-
-     This function is decorated with `dlt.resource` to integrate into the DLT (Data
-     Loading Tool) pipeline. The `write_disposition` parameter is set to "append" to
-     ensure that data from this generator is appended to the existing data in the
-     destination table.
-
-     Yields:
-         dict: A dictionary with keys 'user_id', 'device_name', and 'page_referer',
-         representing the user's tracking data including their device and the page
-         they were referred from.
-    """
-
-    # Sample data representing tracked user data
-    sample_data = [
-        {"user_id": 1, "device_name": "Sony Experia XZ", "page_referer":
-          "https://b2venture.lightning.force.com/"},
-        {"user_id": 2, "device_name": "Samsung Galaxy S23 Ultra 5G",
-         "page_referer": "https://techcrunch.com/2023/07/20/can-dlthub-solve-the-python-library-problem-for-ai-dig-ventures-thinks-so/"},
-        {"user_id": 3, "device_name": "Apple iPhone 14 Pro Max",
-         "page_referer": "https://dlthub.com/success-stories/freelancers-perspective/"},
-        {"user_id": 4, "device_name": "OnePlus 11R",
-         "page_referer": "https://www.reddit.com/r/dataengineering/comments/173kp9o/ideas_for_data_validation_on_data_ingestion/"},
-        {"user_id": 5, "device_name": "Google Pixel 7 Pro", "page_referer": "https://pypi.org/"},
-    ]
-
-    # Yielding each user's data as a dictionary
-    for user_data in sample_data:
-        yield user_data
+   ```py
+    import dlt
+
+    @dlt.resource(write_disposition="append")
+    def tracked_data():
+        """
+        A generator function that yields a series of dictionaries, each representing
+        user tracking data.
+
+        This function is decorated with `dlt.resource` to integrate into the DLT (Data
+        Loading Tool) pipeline. The `write_disposition` parameter is set to "append" to
+        ensure that data from this generator is appended to the existing data in the
+        destination table.
+
+        Yields:
+            dict: A dictionary with keys 'user_id', 'device_name', and 'page_referer',
+            representing the user's tracking data including their device and the page
+            they were referred from.
+        """
+
+        # Sample data representing tracked user data
+        sample_data = [
+            {"user_id": 1, "device_name": "Sony Experia XZ", "page_referer":
+            "https://b2venture.lightning.force.com/"},
+            {"user_id": 2, "device_name": "Samsung Galaxy S23 Ultra 5G",
+            "page_referer": "https://techcrunch.com/2023/07/20/can-dlthub-solve-the-python-library-problem-for-ai-dig-ventures-thinks-so/"},
+            {"user_id": 3, "device_name": "Apple iPhone 14 Pro Max",
+            "page_referer": "https://dlthub.com/success-stories/freelancers-perspective/"},
+            {"user_id": 4, "device_name": "OnePlus 11R",
+            "page_referer": "https://www.reddit.com/r/dataengineering/comments/173kp9o/ideas_for_data_validation_on_data_ingestion/"},
+            {"user_id": 5, "device_name": "Google Pixel 7 Pro", "page_referer": "https://pypi.org/"},
+        ]
+
+        # Yielding each user's data as a dictionary
+        for user_data in sample_data:
+            yield user_data
    ```
 
 ### 2. Create `fetch_average_price` function
@@ -118,7 +118,7 @@ The first step is to register on [SerpAPI](https://serpapi.com/) and obtain the
    information securely, like access tokens. Keep this file safe. Here's its format for service
    account authentication:
 
-   ```python
+   ```py
    [sources]
    api_key= "Please set me up!"  #Serp Api key.
    ```
@@ -126,7 +126,7 @@ The first step is to register on [SerpAPI](https://serpapi.com/) and obtain the
 1. Replace the value of the `api_key`.
 
 1. Create `fetch_average_price()` function as follows:
-   ```python
+   ```py
    import datetime
    import requests
 
@@ -247,7 +247,7 @@ The first step is to register on [SerpAPI](https://serpapi.com/) and obtain the
 
 1. Here, we create the pipeline and use the `add_map` functionality:
 
-   ```python
+   ```py
    # Create the pipeline
    pipeline = dlt.pipeline(
        pipeline_name="data_enrichment_one",
@@ -266,7 +266,7 @@ The first step is to register on [SerpAPI](https://serpapi.com/) and obtain the
    do so, you need to add the transformer decorator at the top of the `fetch_average_price` function.
    For `pipeline.run`, you can use the following code:
 
-   ```python
+   ```py
    # using fetch_average_price as a transformer function
    load_info = pipeline.run(
        tracked_data | fetch_average_price,
@@ -283,19 +283,19 @@ The first step is to register on [SerpAPI](https://serpapi.com/) and obtain the
 1. Install necessary dependencies for the preferred
    [destination](https://dlthub.com/docs/dlt-ecosystem/destinations/), For example, duckdb:
 
-   ```
+   ```sh
    pip install dlt[duckdb]
    ```
 
 1. Run the pipeline with the following command:
 
-   ```
+   ```sh
    python device_enrichment_pipeline.py
    ```
 
 1. To ensure that everything loads as expected, use the command:
 
-   ```
+   ```sh
    dlt pipeline <pipeline_name> show
    ```
 
diff --git a/docs/website/docs/general-usage/destination.md b/docs/website/docs/general-usage/destination.md
index c20aa62d16..3f5eab479e 100644
--- a/docs/website/docs/general-usage/destination.md
+++ b/docs/website/docs/general-usage/destination.md
@@ -75,7 +75,7 @@ azure_storage_account_key="storage key"
 ```
 <!--@@@DLT_SNIPPET_END ./snippets/destination-toml.toml::default_layout-->
 or via environment variables:
-```
+```sh
 DESTINATION__FILESYSTEM__BUCKET_URL=az://dlt-azure-bucket
 DESTINATION__FILESYSTEM__CREDENTIALS__AZURE_STORAGE_ACCOUNT_NAME=dltdata
 DESTINATION__FILESYSTEM__CREDENTIALS__AZURE_STORAGE_ACCOUNT_KEY="storage key"
@@ -171,5 +171,7 @@ load_info.raise_on_failed_jobs()
 <!--@@@DLT_SNIPPET_END ./snippets/destination-snippets.py::late_destination_access-->
 :::
 
-## Declare external destination
-You can implement [your own destination](../walkthroughs/create-new-destination.md) and pass the destination class type or instance to `dlt` pipeline.
\ No newline at end of file
+## Create new destination
+You have two ways to implement a new destination:
+1. You can use `@dlt.destination` decorator and [implement a sink function](../dlt-ecosystem/destinations/destination.md). This is perfect way to implement reverse ETL destinations that push data back to REST APIs.
+2. You can implement [a full destination](../walkthroughs/create-new-destination.md) where you have a full control over load jobs and schema migration.
diff --git a/docs/website/docs/general-usage/full-loading.md b/docs/website/docs/general-usage/full-loading.md
index 4651d156f0..320d0664f5 100644
--- a/docs/website/docs/general-usage/full-loading.md
+++ b/docs/website/docs/general-usage/full-loading.md
@@ -13,7 +13,7 @@ that are not selected while performing a full load will not replace any data in
 
 To perform a full load on one or more of your resources, choose the `write_disposition='replace'` for this resource:
 
-```python
+```py
 p = dlt.pipeline(destination="bigquery", dataset_name="github")
 issues = []
 reactions = ["%2B1", "-1", "smile", "tada", "thinking_face", "heart", "rocket", "eyes"]
diff --git a/docs/website/docs/general-usage/incremental-loading.md b/docs/website/docs/general-usage/incremental-loading.md
index 37b5963431..fe3bb8b61d 100644
--- a/docs/website/docs/general-usage/incremental-loading.md
+++ b/docs/website/docs/general-usage/incremental-loading.md
@@ -64,7 +64,7 @@ child tables.
 Example below loads all the GitHub events and updates them in the destination using "id" as primary
 key, making sure that only a single copy of event is present in `github_repo_events` table:
 
-```python
+```py
 @dlt.resource(primary_key="id", write_disposition="merge")
 def github_repo_events():
     yield from _get_event_pages()
@@ -72,26 +72,28 @@ def github_repo_events():
 
 You can use compound primary keys:
 
-```python
+```py
 @dlt.resource(primary_key=("id", "url"), write_disposition="merge")
-...
+def resource():
+    ...
 ```
 
 By default, `primary_key` deduplication is arbitrary. You can pass the `dedup_sort` column hint with a value of `desc` or `asc` to influence which record remains after deduplication. Using `desc`, the records sharing the same `primary_key` are sorted in descending order before deduplication, making sure the record with the highest value for the column with the `dedup_sort` hint remains. `asc` has the opposite behavior.
 
-```python
+```py
 @dlt.resource(
     primary_key="id",
     write_disposition="merge",
     columns={"created_at": {"dedup_sort": "desc"}}  # select "latest" record
 )
-...
+def resource():
+    ...
 ```
 
 Example below merges on a column `batch_day` that holds the day for which given record is valid.
 Merge keys also can be compound:
 
-```python
+```py
 @dlt.resource(merge_key="batch_day", write_disposition="merge")
 def get_daily_batch(day):
     yield _get_batch_from_bucket(day)
@@ -101,7 +103,7 @@ As with any other write disposition you can use it to load data ad hoc. Below we
 top reactions for `duckdb` repo. The lists have, obviously, many overlapping issues, but we want to
 keep just one instance of each.
 
-```python
+```py
 p = dlt.pipeline(destination="bigquery", dataset_name="github")
 issues = []
 reactions = ["%2B1", "-1", "smile", "tada", "thinking_face", "heart", "rocket", "eyes"]
@@ -117,7 +119,7 @@ Example below dispatches GitHub events to several tables by event type, keeps on
 by "id" and skips loading of past records using "last value" incremental. As you can see, all of
 this we can just declare in our resource.
 
-```python
+```py
 @dlt.resource(primary_key="id", write_disposition="merge", table_name=lambda i: i['type'])
 def github_repo_events(last_created_at = dlt.sources.incremental("created_at", "1970-01-01T00:00:00Z")):
     """A resource taking a stream of github events and dispatching them to tables named by event type. Deduplicates be 'id'. Loads incrementally by 'created_at' """
@@ -134,7 +136,7 @@ Each record in the destination table with the same `primary_key` or `merge_key`
 Deletes are propagated to any child table that might exist. For each record that gets deleted in the root table, all corresponding records in the child table(s) will also be deleted. Records in parent and child tables are linked through the `root key` that is explained in the next section.
 
 #### Example: with primary key and boolean delete column
-```python
+```py
 @dlt.resource(
     primary_key="id",
     write_disposition="merge",
@@ -157,11 +159,11 @@ def resource():
 ```
 
 #### Example: with merge key and non-boolean delete column
-```python
+```py
 @dlt.resource(
     merge_key="id",
     write_disposition="merge",
-    columns={"deleted_at_ts": {"hard_delete": True}}}
+    columns={"deleted_at_ts": {"hard_delete": True}})
 def resource():
     # this will insert two records
     yield [
@@ -175,11 +177,11 @@ def resource():
 ```
 
 #### Example: with primary key and "dedup_sort" hint
-```python
+```py
 @dlt.resource(
     primary_key="id",
     write_disposition="merge",
-    columns={"deleted_flag": {"hard_delete": True}, "lsn": {"dedup_sort": "desc"}}
+    columns={"deleted_flag": {"hard_delete": True}, "lsn": {"dedup_sort": "desc"}})
 def resource():
     # this will insert one record (the one with lsn = 3)
     yield [
@@ -204,7 +206,7 @@ tables. This concept is similar to foreign key which references a parent table,
 set. We do not enable it everywhere because it takes storage space. Nevertheless, is some cases you
 may want to permanently enable root key propagation.
 
-```python
+```py
 pipeline = dlt.pipeline(
     pipeline_name='facebook_insights',
     destination='duckdb',
@@ -243,7 +245,7 @@ Once you've figured that out, `dlt` takes care of finding maximum/minimum cursor
 duplicates and managing the state with last values of cursor. Take a look at GitHub example below, where we
 request recently created issues.
 
-```python
+```py
 @dlt.resource(primary_key="id")
 def repo_issues(
     access_token,
@@ -280,7 +282,7 @@ In the example below we
 incrementally load the GitHub events, where API does not let us filter for the newest events - it
 always returns all of them. Nevertheless, `dlt` will load only the new items, filtering out all the
 duplicates and past issues.
-```python
+```py
 # use naming function in table name to generate separate tables for each event
 @dlt.resource(primary_key="id", table_name=lambda i: i['type'])  # type: ignore
 def repo_events(
@@ -320,7 +322,7 @@ and lets you select nested and complex data (including the whole data item when
 Example below creates last value which is a dictionary holding a max `created_at` value for each
 created table name:
 
-```python
+```py
 def by_event_type(event):
     last_value = None
     if len(event) == 1:
@@ -344,7 +346,7 @@ def get_events(last_created_at = dlt.sources.incremental("$", last_value_func=by
 
 ### Using `end_value` for backfill
 You can specify both initial and end dates when defining incremental loading. Let's go back to our Github example:
-```python
+```py
 @dlt.resource(primary_key="id")
 def repo_issues(
     access_token,
@@ -365,7 +367,7 @@ Please note that when `end_date` is specified, `dlt` **will not modify the exist
 
 To define specific ranges to load, you can simply override the incremental argument in the resource, for example:
 
-```python
+```py
 july_issues = repo_issues(
     created_at=dlt.sources.incremental(
         initial_value='2022-07-01T00:00:00Z', end_value='2022-08-01T00:00:00Z'
@@ -410,7 +412,7 @@ The github events example is exactly such case. The results are ordered on curso
 In the same fashion the `row_order` can be used to **optimize backfill** so we don't continue
 making unnecessary API requests after the end of range is reached. For example:
 
-```python
+```py
 @dlt.resource(primary_key="id")
 def tickets(
     zendesk_client,
@@ -443,7 +445,7 @@ incremental and exit yield loop when true.
 The `dlt.sources.incremental` instance provides `start_out_of_range` and `end_out_of_range`
 attributes which are set when the resource yields an element with a higher/lower cursor value than the
 initial or end values. If you do not want `dlt` to stop processing automatically and instead to handle such events yourself, do not specify `row_order`:
-```python
+```py
 @dlt.transformer(primary_key="id")
 def tickets(
     zendesk_client,
@@ -465,16 +467,25 @@ def tickets(
 ```
 :::
 
-### Deduplication primary_key
+### Deduplicate overlapping ranges with primary key
 
-`dlt.sources.incremental` will inherit the primary key that is set on the resource.
+`Incremental` **does not** deduplicate datasets like **merge** write disposition does. It however
+makes sure than when another portion of data is extracted, records that were previously loaded won't be
+included again. `dlt` assumes that you load a range of data, where the lower bound is inclusive (ie. greater than equal).
+This makes sure that you never lose any data but will also re-acquire some rows.
+For example: you have a database table with an cursor field on `updated_at` which has a day resolution, then there's a high
+chance that after you extract data on a given day, still more records will be added. When you extract on the next day, you
+should reacquire data from the last day to make sure all records are present, this will however create overlap with data
+from previous extract.
 
- let's you optionally set a `primary_key` that is used exclusively to
+By default, content hash (a hash of `json` representation of a row) will be used to deduplicate.
+This may be slow so`dlt.sources.incremental` will inherit the primary key that is set on the resource.
+You can optionally set a `primary_key` that is used exclusively to
 deduplicate and which does not become a table hint. The same setting lets you disable the
 deduplication altogether when empty tuple is passed. Below we pass `primary_key` directly to
 `incremental` to disable deduplication. That overrides `delta` primary_key set in the resource:
 
-```python
+```py
 @dlt.resource(primary_key="delta")
 # disable the unique value check by passing () as primary key to incremental
 def some_data(last_timestamp=dlt.sources.incremental("item.ts", primary_key=())):
@@ -487,7 +498,7 @@ def some_data(last_timestamp=dlt.sources.incremental("item.ts", primary_key=()))
 When resources are [created dynamically](source.md#create-resources-dynamically) it is possible to
 use `dlt.sources.incremental` definition as well.
 
-```python
+```py
 @dlt.source
 def stripe():
     # declare a generator function
@@ -523,7 +534,7 @@ result in `IncrementalUnboundError` exception.
 
 ### Using Airflow schedule for backfill and incremental loading
 When [running in Airflow task](../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md#2-modify-dag-file), you can opt-in your resource to get the `initial_value`/`start_value` and `end_value` from Airflow schedule associated with your DAG. Let's assume that **Zendesk tickets** resource contains a year of data with thousands of tickets. We want to backfill the last year of data week by week and then continue incremental loading daily.
-```python
+```py
 @dlt.resource(primary_key="id")
 def tickets(
     zendesk_client,
@@ -542,7 +553,7 @@ We opt-in to Airflow scheduler by setting `allow_external_schedulers` to `True`:
 2. In all other environments, the `incremental` behaves as usual, maintaining `dlt` state.
 
 Let's generate a deployment with `dlt deploy zendesk_pipeline.py airflow-composer` and customize the dag:
-```python
+```py
 @dag(
     schedule_interval='@weekly',
     start_date=pendulum.datetime(2023, 2, 1),
@@ -579,7 +590,7 @@ When you enable the DAG in Airflow, it will generate several runs and start exec
 subsequent weekly intervals starting with `2023-02-12, 00:00:00 UTC` to `2023-02-19, 00:00:00 UTC`.
 
 You can repurpose the DAG above to start loading new data incrementally after (or during) the backfill:
-```python
+```py
 @dag(
     schedule_interval='@daily',
     start_date=pendulum.datetime(2023, 2, 1),
@@ -626,7 +637,7 @@ You may force a full refresh of a `merge` and `append` pipelines:
 
 Example:
 
-```python
+```py
 p = dlt.pipeline(destination="bigquery", dataset_name="dataset_name")
 # do a full refresh
 p.run(merge_source(), write_disposition="replace")
@@ -657,7 +668,7 @@ is loaded, the yielded resource data will be loaded at the same time with the up
 
 In the two examples below you see how the `dlt.sources.incremental` is working under the hood.
 
-```python
+```py
 @resource()
 def tweets():
     # Get a last value from loaded metadata. If not exist, get None
@@ -672,7 +683,7 @@ def tweets():
 If we keep a list or a dictionary in the state, we can modify the underlying values in the objects,
 and thus we do not need to set the state back explicitly.
 
-```python
+```py
 @resource()
 def tweets():
     # Get a last value from loaded metadata. If not exist, get None
@@ -710,7 +721,7 @@ data twice - even if the user makes a mistake and requests the same months range
 
 In the following example, we initialize a variable with an empty list as a default:
 
-```python
+```py
 @dlt.resource(write_disposition="append")
 def players_games(chess_url, players, start_month=None, end_month=None):
     loaded_archives_cache = dlt.current.resource_state().setdefault("archives", [])
@@ -736,7 +747,7 @@ def players_games(chess_url, players, start_month=None, end_month=None):
 
 ### Advanced state usage: tracking the last value for all search terms in Twitter API
 
-```python
+```py
 @dlt.resource(write_disposition="append")
 def search_tweets(twitter_bearer_token=dlt.secrets.value, search_terms=None, start_time=None, end_time=None, last_value=None):
     headers = _headers(twitter_bearer_token)
diff --git a/docs/website/docs/general-usage/pipeline.md b/docs/website/docs/general-usage/pipeline.md
index 095e03e96d..53eca2e59a 100644
--- a/docs/website/docs/general-usage/pipeline.md
+++ b/docs/website/docs/general-usage/pipeline.md
@@ -15,7 +15,7 @@ Example:
 
 This pipeline will load a list of objects into `duckdb` table with a name "three":
 
-```python
+```py
 import dlt
 
 pipeline = dlt.pipeline(destination="duckdb", dataset_name="sequence")
@@ -53,7 +53,7 @@ Arguments:
 
 Example: This pipeline will load the data the generator `generate_rows(10)` produces:
 
-```python
+```py
 import dlt
 
 def generate_rows(nr):
@@ -110,7 +110,7 @@ pipeline run is progressing. `dlt` supports 4 progress monitors out of the box:
 You pass the progress monitor in `progress` argument of the pipeline. You can use a name from the
 list above as in the following example:
 
-```python
+```py
 # create a pipeline loading chess data that dumps
 # progress to stdout each 10 seconds (the default)
 pipeline = dlt.pipeline(
@@ -123,7 +123,7 @@ pipeline = dlt.pipeline(
 
 You can fully configure the progress monitor. See two examples below:
 
-```python
+```py
 # log each minute to Airflow task logger
 ti = get_current_context()["ti"]
 pipeline = dlt.pipeline(
@@ -134,7 +134,7 @@ pipeline = dlt.pipeline(
 )
 ```
 
-```python
+```py
 # set tqdm bar color to yellow
 pipeline = dlt.pipeline(
     pipeline_name="chess_pipeline",
diff --git a/docs/website/docs/general-usage/resource.md b/docs/website/docs/general-usage/resource.md
index 9b8d45982d..e2e95d937f 100644
--- a/docs/website/docs/general-usage/resource.md
+++ b/docs/website/docs/general-usage/resource.md
@@ -19,7 +19,7 @@ Commonly used arguments:
 
 Example:
 
-```python
+```py
 @dlt.resource(name='table_name', write_disposition='replace')
 def generate_rows():
 	for i in range(10):
@@ -32,7 +32,7 @@ def source_name():
 
 To get the data of a resource, we could do:
 
-```python
+```py
 for row in generate_rows():
     print(row)
 
@@ -57,7 +57,7 @@ accepts following arguments:
    `dlt` that column `tags` (containing a list of tags) in `user` table should have type `complex`
    which means that it will be loaded as JSON/struct and not as child table.
 
-  ```python
+  ```py
   @dlt.resource(name="user", columns={"tags": {"data_type": "complex"}})
   def get_users():
     ...
@@ -82,7 +82,7 @@ You can alternatively use a [Pydantic](https://pydantic-docs.helpmanual.io/) mod
 For example:
 
 
-```python
+```py
 from pydantic import BaseModel
 
 
@@ -119,7 +119,7 @@ Things to note:
 
 You can override this by configuring the Pydantic model
 
-```python
+```py
 from typing import ClassVar
 from dlt.common.libs.pydantic import DltConfig
 
@@ -146,7 +146,7 @@ argument and the `table_name` string as a return value.
 For example, a resource that loads GitHub repository events wants to send `issue`, `pull request`,
 and `comment` events to separate tables. The type of the event is in the "type" field.
 
-```python
+```py
 # send item to a table with name item["type"]
 @dlt.resource(table_name=lambda event: event['type'])
 def repo_events() -> Iterator[TDataItems]:
@@ -154,13 +154,13 @@ def repo_events() -> Iterator[TDataItems]:
 
 # the `table_schema` method gets table schema generated by a resource and takes optional
 # data item to evaluate dynamic hints
-print(repo_events().table_schema({"type": "WatchEvent", id=...}))
+print(repo_events().table_schema({"type": "WatchEvent", id:...}))
 ```
 
 In more advanced cases, you can dispatch data to different tables directly in the code of the
 resource function:
 
-```python
+```py
 @dlt.resource
 def repo_events() -> Iterator[TDataItems]:
     # mark the "item" to be sent to table with name item["type"]
@@ -172,7 +172,7 @@ def repo_events() -> Iterator[TDataItems]:
 You can add arguments to your resource functions like to any other. Below we parametrize our
 `generate_rows` resource to generate the number of rows we request:
 
-```python
+```py
 @dlt.resource(name='table_name', write_disposition='replace')
 def generate_rows(nr):
 	for i in range(nr):
@@ -195,7 +195,7 @@ that returns a list of objects (i.e. users) in one endpoint and user details in
 with this by declaring a resource that obtains a list of users and another resource that receives
 items from the list and downloads the profiles.
 
-```python
+```py
 @dlt.resource(write_disposition="replace")
 def users(limit=None):
     for u in _get_users(limit):
@@ -215,7 +215,7 @@ pipeline.run(user_details)
 ```
 In the example above, `user_details` will receive data from default instance of `users` resource (with `limit` set to `None`). You can also use
 **pipe |** operator to bind resources dynamically
-```python
+```py
 # you can be more explicit and use a pipe operator.
 # with it you can create dynamic pipelines where the dependencies
 # are set at run time and resources are parametrized i.e.
@@ -225,7 +225,7 @@ pipeline.run(users(limit=100) | user_details)
 
 :::tip
 Transformers are allowed not only to **yield** but also to **return** values and can decorate **async** functions and [**async generators**](../reference/performance.md#extract). Below we decorate an async function and request details on two pokemons. Http calls are made in parallel via httpx library.
-```python
+```py
 import dlt
 import httpx
 
@@ -245,7 +245,7 @@ print(list([1,2] | pokemon()))
 A standalone resource is defined on a function that is top level in a module (not inner function) that accepts config and secrets values. Additionally
 if `standalone` flag is specified, the decorated function signature and docstring will be preserved. `dlt.resource` will just wrap the
 decorated function and user must call the wrapper to get the actual resource. Below we declare a `filesystem` resource that must be called before use.
-```python
+```py
 @dlt.resource(standalone=True)
 def filesystem(bucket_url=dlt.config.value):
   """list and yield files in `bucket_url`"""
@@ -256,7 +256,7 @@ pipeline.run(filesystem("s3://my-bucket/reports"), table_name="reports")
 ```
 
 Standalone may have dynamic name that depends on the arguments passed to the decorated function. For example::
-```python
+```py
 @dlt.resource(standalone=True, name=lambda args: args["stream_name"])
 def kinesis(stream_name: str):
     ...
@@ -271,7 +271,7 @@ You can extract multiple resources in parallel threads or with async IO.
 To enable this for a sync resource you can set the `parallelized` flag to `True` in the resource decorator:
 
 
-```python
+```py
 @dlt.resource(parallelized=True)
 def get_users():
     for u in _get_users():
@@ -288,7 +288,7 @@ pipeline.run(get_users(), get_orders())
 
 Async generators are automatically extracted concurrently with other resources:
 
-```python
+```py
 @dlt.resource
 async def get_users():
     async for u in _get_users():  # Assuming _get_users is an async generator
@@ -317,7 +317,7 @@ so:
 
 Here's our resource:
 
-```python
+```py
 import dlt
 
 @dlt.resource(write_disposition="replace")
@@ -330,7 +330,7 @@ def users():
 
 Here's our script that defines transformations and loads the data:
 
-```python
+```py
 from pipedrive import users
 
 def anonymize_user(user_data):
@@ -351,7 +351,7 @@ example data and test your transformations etc. In order to do that, you limit h
 be yielded by a resource by calling `resource.add_limit` method. In the example below we load just
 10 first items from and infinite counter - that would otherwise never end.
 
-```python
+```py
 r = dlt.resource(itertools.count(), name="infinity").add_limit(10)
 assert list(r) == list(range(10))
 ```
@@ -375,7 +375,7 @@ that will keep just one updated record per `user_id`. It also adds
 ["last value" incremental loading](incremental-loading.md#incremental_loading-with-last-value) on
 `created_at` column to prevent requesting again the already loaded records:
 
-```python
+```py
 tables = sql_database()
 tables.users.apply_hints(
     write_disposition="merge",
@@ -386,7 +386,7 @@ pipeline.run(tables)
 ```
 
 To just change a name of a table to which resource will load data, do the following:
-```python
+```py
 tables = sql_database()
 tables.users.table_name = "other_users"
 ```
@@ -398,7 +398,7 @@ with the existing schema in the same way `apply_hints` method above works. There
 should avoid lengthy operations (ie. reflecting database tables) during creation of the DAG so it is better do do it when DAG executes. You may also emit partial
 hints (ie. precision and scale for decimal types) for column to help `dlt` type inference.
 
-```python
+```py
 @dlt.resource
 def sql_table(credentials, schema, table):
     # create sql alchemy engine
@@ -432,7 +432,7 @@ You can emit columns as Pydantic model and use dynamic hints (ie. lambda for tab
 
 ### Duplicate and rename resources
 There are cases when you your resources are generic (ie. bucket filesystem) and you want to load several instances of it (ie. files from different folders) to separate tables. In example below we use `filesystem` source to load csvs from two different folders into separate tables:
-```python
+```py
 @dlt.resource(standalone=True)
 def filesystem(bucket_url):
   # list and yield files in bucket_url
@@ -463,7 +463,7 @@ You can pass individual resources or list of resources to the `dlt.pipeline` obj
 loaded outside the source context, will be added to the [default schema](schema.md) of the
 pipeline.
 
-```python
+```py
 @dlt.resource(name='table_name', write_disposition='replace')
 def generate_rows(nr):
 	for i in range(nr):
@@ -485,6 +485,6 @@ To do a full refresh of an `append` or `merge` resources you temporarily change
 disposition to replace. You can use `apply_hints` method of a resource or just provide alternative
 write disposition when loading:
 
-```python
+```py
 p.run(merge_source(), write_disposition="replace")
 ```
diff --git a/docs/website/docs/general-usage/schema-contracts.md b/docs/website/docs/general-usage/schema-contracts.md
index 764b565beb..1b5e67357a 100644
--- a/docs/website/docs/general-usage/schema-contracts.md
+++ b/docs/website/docs/general-usage/schema-contracts.md
@@ -49,7 +49,7 @@ The `schema_contract` argument accepts two forms:
 2. **shorthand** a contract mode (string) that will be applied to all schema entities.
 
 For example setting `schema_contract` to *freeze* will expand to the full form:
-```python
+```py
 {"tables": "freeze", "columns": "freeze", "data_type": "freeze"}
 ```
 
@@ -65,7 +65,7 @@ You can change the contract on the **source** instance via `schema_contract` pro
 Pydantic models can be used to [define table schemas and validate incoming data](resource.md#define-a-schema-with-pydantic). You can use any model you already have. `dlt` will internally synthesize (if necessary) new models that conform with the **schema contract** on the resource.
 
 Just passing a model in `column` argument of the [dlt.resource](resource.md#define-a-schema-with-pydantic) sets a schema contract that conforms to default Pydantic behavior:
-```python
+```py
 {
   "tables": "evolve",
   "columns": "discard_value",
@@ -121,10 +121,10 @@ Here's how `dlt` deals with column modes:
 When contract is violated in freeze mode, `dlt` raises `DataValidationError` exception. This exception gives access to the full context and passes the evidence to the caller.
 As with any other exception coming from pipeline run, it will be re-raised via `PipelineStepFailed` exception which you should catch in except:
 
-```python
+```py
 try:
   pipeline.run()
-except as pip_ex:
+except Exception as pip_ex:
   if pip_ex.step == "normalize":
     if isinstance(pip_ex.__context__.__context__, DataValidationError):
       ...
@@ -195,7 +195,7 @@ def items():
 def other_items():
     ...
 
-@dlt.source(schema_contract={"columns": "freeze", "data_type": "freeze"}):
+@dlt.source(schema_contract={"columns": "freeze", "data_type": "freeze"})
 def source():
   return [items(), other_items()]
 
diff --git a/docs/website/docs/general-usage/schema-evolution.md b/docs/website/docs/general-usage/schema-evolution.md
index 71a6b66521..377df0e47f 100644
--- a/docs/website/docs/general-usage/schema-evolution.md
+++ b/docs/website/docs/general-usage/schema-evolution.md
@@ -7,14 +7,14 @@ keywords: [schema evolution, schema, dlt schema]
 # Schema evolution
 
 ## When to use schema evolution?
-Schema evolution is a best practice when ingesting most data. It’s simply a way to get data across a format barrier. 
+Schema evolution is a best practice when ingesting most data. It’s simply a way to get data across a format barrier.
 
 It separates the technical challenge of “loading” data, from the business challenge of “curating” data. This enables us to have pipelines that are maintainable by different individuals at different stages.
 
 However, for cases where schema evolution might be triggered by malicious events, such as in web tracking, data contracts are advised. Read more about how to implement data contracts [here](https://dlthub.com/docs/general-usage/schema-contracts).
 
 ## Schema evolution with `dlt`
-`dlt` automatically infers the initial schema for your first pipeline run. However, in most cases, the schema tends to change over time, which makes it critical for downstream consumers to adapt to schema changes. 
+`dlt` automatically infers the initial schema for your first pipeline run. However, in most cases, the schema tends to change over time, which makes it critical for downstream consumers to adapt to schema changes.
 
 As the structure of data changes, such as the addition of new columns, changing data types, etc., `dlt` handles these schema changes, enabling you to adapt to changes without losing velocity.
 
@@ -23,11 +23,11 @@ The first run of a pipeline will scan the data that goes through it and generate
 
 We’ll review some examples here and figure out how `dlt` creates initial schema and how normalisation works. Consider a pipeline that loads the following schema:
 
-```python
+```py
 data = [{
     "organization": "Tech Innovations Inc.",
     "address": {
-        'building': 'r&d', 
+        'building': 'r&d',
         "room": 7890,
     },
     "Inventory": [
@@ -62,22 +62,22 @@ Let’s add the following 4 cases:
 - A column is renamed: a field “building” was renamed to “main_block”.
 
 Please update the pipeline for the cases discussed above.
-```python
+```py
 data = [{
     "organization": "Tech Innovations Inc.",
     # Column added:
-    "CEO": "Alice Smith", 
+    "CEO": "Alice Smith",
     "address": {
         # 'building' renamed to 'main_block'
-        'main_block': 'r&d', 
+        'main_block': 'r&d',
 	      # Removed room column
-        # "room": 7890,  
+        # "room": 7890,
     },
     "Inventory": [
         # Type change: 'inventory_nr' changed to string from int
-        {"name": "Plasma ray", "inventory nr": "AR2411"}, 
-        {"name": "Self-aware Roomba", "inventory nr": "AR268"},  
-        {"name": "Type-inferrer", "inventory nr": "AR3621"}  
+        {"name": "Plasma ray", "inventory nr": "AR2411"},
+        {"name": "Self-aware Roomba", "inventory nr": "AR268"},
+        {"name": "Type-inferrer", "inventory nr": "AR3621"}
     ]
 }]
 
@@ -110,7 +110,7 @@ The column lineage can be tracked by loading the 'load_info' to the destination.
 **Getting notifications**
 
 We can read the load outcome and send it to slack webhook with `dlt`.
-```python
+```py
 # Import the send_slack_message function from the dlt library
 from dlt.common.runtime.slack import send_slack_message
 
@@ -123,7 +123,7 @@ for package in info.load_packages:
     for table_name, table in package.schema_update.items():
         # Iterate over each column in the current table
         for column_name, column in table["columns"].items():
-            # Send a message to the Slack channel with the table 
+            # Send a message to the Slack channel with the table
 						# and column update information
             send_slack_message(
                 hook,
@@ -142,16 +142,16 @@ This script sends Slack notifications for schema updates using the `send_slack_m
 
 ### How to test for removed columns - applying “not null” constraint
 
-A column not existing, and a column being null, are two different things. However, when it comes to APIs and json, it’s usually all treated the same - the key-value pair will simply not exist. 
+A column not existing, and a column being null, are two different things. However, when it comes to APIs and json, it’s usually all treated the same - the key-value pair will simply not exist.
 
 To remove a column, exclude it from the output of the resource function. Subsequent data inserts will treat this column as null. Verify column removal by applying a not null constraint. For instance, after removing the "room" column, apply a not null constraint to confirm its exclusion.
 
-```python
+```py
 
 data = [{
     "organization": "Tech Innovations Inc.",
     "address": {
-        'building': 'r&d' 
+        'building': 'r&d'
         #"room": 7890,
     },
     "Inventory": [
@@ -171,20 +171,20 @@ During pipeline execution a data validation error indicates that a removed colum
 
 The data in the pipeline mentioned above is modified.
 
-```python
+```py
 data = [{
     "organization": "Tech Innovations Inc.",
     "CEO": "Alice Smith",
     "address": {'main_block': 'r&d'},
     "Inventory": [
-        {"name": "Plasma ray", "inventory nr": "AR2411"}, 
-        {"name": "Self-aware Roomba", "inventory nr": "AR268"}, 
+        {"name": "Plasma ray", "inventory nr": "AR2411"},
+        {"name": "Self-aware Roomba", "inventory nr": "AR268"},
         {
             "name": "Type-inferrer", "inventory nr": "AR3621",
             "details": {
-                "category": "Computing Devices", 
+                "category": "Computing Devices",
                 "id": 369,
-                "specifications": [{ 
+                "specifications": [{
                     "processor": "Quantum Core",
                     "memory": "512PB"
                 }]
@@ -201,7 +201,7 @@ The schema of the data above is loaded to the destination as follows:
 
 ## What did the schema evolution engine do?
 
-The schema evolution engine in the `dlt` library is designed to handle changes in the structure of your data over time. For example: 
+The schema evolution engine in the `dlt` library is designed to handle changes in the structure of your data over time. For example:
 
 - As above in continuation of the inferred schema, the “specifications” are nested in "details”, which are nested in “Inventory”, all under table name “org”. So the table created for projects is `org__inventory__details__specifications`.
 
@@ -209,6 +209,6 @@ These is a simple examples of how schema evolution works.
 
 ## Schema evolution using schema and data contracts
 
-Demonstrating schema evolution without talking about schema and data contracts is only one side of the coin. Schema and data contracts dictate the terms of how the schema being written to destination should evolve. 
+Demonstrating schema evolution without talking about schema and data contracts is only one side of the coin. Schema and data contracts dictate the terms of how the schema being written to destination should evolve.
 
 Schema and data contracts can be applied to entities ‘tables’ , ‘columns’  and ‘data_types’ using contract modes ‘evolve’, freeze’, ‘discard_rows’ and ‘discard_columns’ to tell `dlt` how to apply contract for a particular entity. To read more about **schema and data contracts**  read our [documentation](https://dlthub.com/docs/general-usage/schema-contracts).
\ No newline at end of file
diff --git a/docs/website/docs/general-usage/schema.md b/docs/website/docs/general-usage/schema.md
index 7ce1d959c9..164814010d 100644
--- a/docs/website/docs/general-usage/schema.md
+++ b/docs/website/docs/general-usage/schema.md
@@ -149,7 +149,7 @@ Now imagine the data has changed and `id` field also contains strings
 
 ```py
 data = [
-  {"id": 1, "human_name": "Alice"}
+  {"id": 1, "human_name": "Alice"},
   {"id": "idx-nr-456", "human_name": "Bob"}
 ]
 ```
@@ -308,7 +308,7 @@ schema available via `dlt.current.source_schema()`.
 
 Example:
 
-```python
+```py
 @dlt.source
 def textual(nesting_level: int):
     # get the source schema from the `current` context
diff --git a/docs/website/docs/general-usage/source.md b/docs/website/docs/general-usage/source.md
index 1b3d1ce0cc..bcdd137dce 100644
--- a/docs/website/docs/general-usage/source.md
+++ b/docs/website/docs/general-usage/source.md
@@ -26,7 +26,7 @@ You declare source by decorating an (optionally async) function that return or y
 You can create resources by using `dlt.resource` as a function. In an example below we reuse a
 single generator function to create a list of resources for several Hubspot endpoints.
 
-```python
+```py
 @dlt.source
 def hubspot(api_key=dlt.secrets.value):
 
@@ -59,7 +59,7 @@ If this is impractical (for example you want to reflect a database to create res
 You can access resources present in a source and select which of them you want to load. In case of
 `hubspot` resource above we could select and load "companies", "deals" and "products" resources:
 
-```python
+```py
 from hubspot import hubspot
 
 source = hubspot()
@@ -73,7 +73,7 @@ pipeline.run(source.with_resources("companies", "deals"))
 
 Resources can be individually accessed and selected:
 
-```python
+```py
 # resources are accessible as attributes of a source
 for c in source.companies:  # enumerate all data in companies resource
     print(c)
@@ -89,7 +89,7 @@ source.deals.selected = False
 You can modify and filter data in resources, for example if we want to keep only deals after certain
 date:
 
-```python
+```py
 source.deals.add_filter(lambda deal: deal["created_at"] > yesterday)
 ```
 
@@ -103,7 +103,7 @@ You can easily get your test dataset in a few minutes, when otherwise you'd need
 the full loading to complete. Below we limit the `pipedrive` source to just get 10 pages of data
 from each endpoint. Mind that the transformers will be evaluated fully:
 
-```python
+```py
 from pipedrive import pipedrive_source
 
 pipeline = dlt.pipeline(pipeline_name='pipedrive', destination='duckdb', dataset_name='pipedrive_data')
@@ -121,7 +121,7 @@ declare a new
 [transformer that takes the data from](resource.md#feeding-data-from-one-resource-into-another) `deals`
 resource and add it to the source.
 
-```python
+```py
 import dlt
 from hubspot import hubspot
 
@@ -140,11 +140,11 @@ source.resources.add(source.deals | deal_scores)
 pipeline.run(source)
 ```
 You can also set the resources in the source as follows
-```python
+```py
 source.deal_scores = source.deals | deal_scores
 ```
 or
-```python
+```py
 source.resources["deal_scores"] = source.deals | deal_scores
 ```
 :::note
@@ -156,7 +156,7 @@ When adding resource to the source, `dlt` clones the resource so your existing i
 You can limit how deep `dlt` goes when generating child tables. By default, the library will descend
 and generate child tables for all nested lists, without limit.
 
-```python
+```py
 @dlt.source(max_table_nesting=1)
 def mongo_db():
     ...
@@ -172,7 +172,7 @@ tables of child tables). Typical settings:
 
 You can achieve the same effect after the source instance is created:
 
-```python
+```py
 from mongo_db import mongo_db
 
 source = mongo_db()
@@ -202,7 +202,7 @@ You are also free to decompose a single source into several ones. For example, y
 down a 50 table copy job into an airflow dag with high parallelism to load the data faster. To do
 so, you could get the list of resources as:
 
-```python
+```py
 # get a list of resources' names
 resource_list = sql_source().resources.keys()
 
@@ -216,12 +216,12 @@ for res in resource_list:
 You can temporarily change the "write disposition" to `replace` on all (or selected) resources within
 a source to force a full refresh:
 
-```python
+```py
 p.run(merge_source(), write_disposition="replace")
 ```
 
 With selected resources:
 
-```python
+```py
 p.run(tables.with_resources("users"), write_disposition="replace")
 ```
diff --git a/docs/website/docs/general-usage/state.md b/docs/website/docs/general-usage/state.md
index 23625db27c..0ab2b8a658 100644
--- a/docs/website/docs/general-usage/state.md
+++ b/docs/website/docs/general-usage/state.md
@@ -15,7 +15,7 @@ You read and write the state in your resources. Below we use the state to create
 game archives which we then use to
 [prevent requesting duplicates](incremental-loading.md#advanced-state-usage-storing-a-list-of-processed-entities).
 
-```python
+```py
 @dlt.resource(write_disposition="append")
 def players_games(chess_url, player, start_month=None, end_month=None):
     # create or request a list of archives from resource scoped state
diff --git a/docs/website/docs/getting-started.md b/docs/website/docs/getting-started.md
index cd121b0ad5..ecaa78c949 100644
--- a/docs/website/docs/getting-started.md
+++ b/docs/website/docs/getting-started.md
@@ -20,13 +20,13 @@ Let's get started!
 
 Install dlt using `pip`:
 
-```bash
+```sh
 pip install -U dlt
 ```
 
 The command above installs (or upgrades) the library core, in the example below we use DuckDB as a destination so let's add a `duckdb` dependency:
 
-```bash
+```sh
 pip install "dlt[duckdb]"
 ```
 
@@ -63,13 +63,13 @@ When you look at the code above, you can see that we:
 
 Save this Python script with the name `quick_start_pipeline.py` and run the following command:
 
-```bash
+```sh
 python quick_start_pipeline.py
 ```
 
 The output should look like:
 
-```bash
+```sh
 Pipeline quick_start completed in 0.59 seconds
 1 load package(s) were loaded to destination duckdb and into dataset mydata
 The duckdb destination used duckdb:////home/user-name/quick_start/quick_start.duckdb location to store data
@@ -82,13 +82,13 @@ Load package 1692364844.460054 is LOADED and contains no failed jobs
 
 To allow sneak peek and basic discovery you can take advantage of [built-in integration with Strealmit](reference/command-line-interface#show-tables-and-data-in-the-destination):
 
-```bash
+```sh
 dlt pipeline quick_start show
 ```
 
 **quick_start** is the name of the pipeline from the script above. If you do not have Streamlit installed yet do:
 
-```bash
+```sh
 pip install streamlit
 ```
 
diff --git a/docs/website/docs/intro.md b/docs/website/docs/intro.md
index 6df0dad82d..d6d823ad47 100644
--- a/docs/website/docs/intro.md
+++ b/docs/website/docs/intro.md
@@ -17,9 +17,9 @@ from various and often messy data sources into well-structured, live datasets. T
 ```sh
 pip install dlt
 ```
-Unlike other solutions, with dlt, there's no need to use any backends or containers. Simply import `dlt` in a Python file or a Jupyter Notebook cell, and create a pipeline to load data into any of the [supported destinations](dlt-ecosystem/destinations/). You can load data from any source that produces Python data structures, including APIs, files, databases, and more.
+Unlike other solutions, with dlt, there's no need to use any backends or containers. Simply import `dlt` in a Python file or a Jupyter Notebook cell, and create a pipeline to load data into any of the [supported destinations](dlt-ecosystem/destinations/). You can load data from any source that produces Python data structures, including APIs, files, databases, and more. `dlt` also supports building a [custom destination](dlt-ecosystem/destinations/destination.md), which you can use as reverse ETL.
 
-The library will create or update tables, infer data types and handle nested data automatically. Here are a few example pipelines:
+The library will create or update tables, infer data types, and handle nested data automatically. Here are a few example pipelines:
 
 <Tabs
   groupId="source-type"
@@ -60,7 +60,7 @@ pip install "dlt[duckdb]"
 Now **run** your Python file or Notebook cell.
 
 How it works? The library extracts data from a [source](general-usage/glossary.md#source) (here: **chess.com REST API**), inspects its structure to create a
-[schema](general-usage/glossary.md#schema), structures, normalizes and verifies the data, and then
+[schema](general-usage/glossary.md#schema), structures, normalizes, and verifies the data, and then
 loads it into a [destination](general-usage/glossary.md#destination) (here: **duckdb**, into a database schema **player_data** and table name **player**).
 
 
@@ -177,7 +177,7 @@ pip install sqlalchemy pymysql
 - Automated maintenance - with schema inference and evolution and alerts, and with short declarative
 code, maintenance becomes simple.
 - Run it where Python runs - on Airflow, serverless functions, notebooks. No
-external APIs, backends or containers, scales on micro and large infra alike.
+external APIs, backends, or containers, scales on micro and large infra alike.
 - User-friendly, declarative interface that removes knowledge obstacles for beginners
 while empowering senior professionals.
 
@@ -187,7 +187,7 @@ while empowering senior professionals.
 [Google Colab demo](https://colab.research.google.com/drive/1NfSB1DpwbbHX9_t5vlalBTf13utwpMGx?usp=sharing).
 This is the simplest way to see `dlt` in action.
 3. Read the [Tutorial](tutorial/intro) to learn how to build a pipeline that loads data from an API.
-4. Check out the [How-to guides](walkthroughs/) for recepies on common use cases for creating, running and deploying pipelines.
+4. Check out the [How-to guides](walkthroughs/) for recipes on common use cases for creating, running, and deploying pipelines.
 5. Ask us on
 [Slack](https://dlthub.com/community)
 if you have any questions about use cases or the library.
@@ -197,4 +197,4 @@ if you have any questions about use cases or the library.
 1. Give the library a ⭐ and check out the code on [GitHub](https://github.com/dlt-hub/dlt).
 1. Ask questions and share how you use the library on
 [Slack](https://dlthub.com/community).
-1. Report problems and make feature requests [here](https://github.com/dlt-hub/dlt/issues/new/choose).
+1. Report problems and make feature requests [here](https://github.com/dlt-hub/dlt/issues/new/choose).
\ No newline at end of file
diff --git a/docs/website/docs/reference/command-line-interface.md b/docs/website/docs/reference/command-line-interface.md
index b37a3a118e..599ffd3ebd 100644
--- a/docs/website/docs/reference/command-line-interface.md
+++ b/docs/website/docs/reference/command-line-interface.md
@@ -8,7 +8,7 @@ keywords: [command line interface, cli, dlt init]
 
 ## `dlt init`
 
-```shell
+```sh
 dlt init <source> <destination>
 ```
 This command creates new dlt pipeline script that loads data from `source` to `destination` to it. When you run the command:
@@ -26,7 +26,7 @@ version if run again with existing `source` name. You are warned if files will b
 You can use `--location <repo_url or local folder>` option to specify your own repository with sources. Typically you would [fork ours](https://github.com/dlt-hub/verified-sources) and start customizing and adding sources ie. to use them for your team or organization. You can also specify a branch with `--branch <name>` ie. to test a version being developed.
 
 ### List all verified sources
-```shell
+```sh
 dlt init --list-verified-sources
 ```
 Shows all available verified sources and their short descriptions. For each source, checks if your local `dlt` version requires update
@@ -43,7 +43,7 @@ that will add additional packages to current environment.
 
 ### github-action
 
-```shell
+```sh
 dlt deploy <script>.py github-action --schedule "*/30 * * * *"
 ```
 
@@ -62,7 +62,7 @@ Follow the guide on [how to deploy a pipeline with Github Actions](../walkthroug
 
 ### airflow-composer
 
-```shell
+```sh
 dlt deploy <script>.py airflow-composer
 ```
 
@@ -88,7 +88,7 @@ check for problems with the data loading.
 
 ### Show tables and data in the destination
 
-```shell
+```sh
 dlt pipeline <pipeline name> show
 ```
 
@@ -99,7 +99,7 @@ destination credentials. Requires `streamlit` to be installed.
 
 ### Get the pipeline information
 
-```shell
+```sh
 dlt pipeline <pipeline name> info
 ```
 
@@ -109,7 +109,7 @@ pipeline state set by the resources during extraction process.
 
 ### Get the load package information
 
-```shell
+```sh
 dlt pipeline <pipeline name> load-package <load id>
 ```
 
@@ -121,7 +121,7 @@ list of all tables and columns created at the destination during loading of that
 
 ### List all failed jobs
 
-```shell
+```sh
 dlt pipeline <pipeline name> failed-jobs
 ```
 
@@ -130,7 +130,7 @@ files that got loaded and the failure message from the destination.
 
 ### Get the last run trace
 
-```shell
+```sh
 dlt pipeline <pipeline name> trace
 ```
 
@@ -141,7 +141,7 @@ will display the [load info](walkthroughs/run-a-pipeline.md) instead.
 
 ### Sync pipeline with the destination
 
-```shell
+```sh
 dlt pipeline <pipeline name> sync
 ```
 
@@ -171,7 +171,7 @@ dlt pipeline github_events drop repo_events
 `dlt` will inform you on the names of dropped tables and the resource state slots that will be
 reset:
 
-```
+```text
 About to drop the following data in dataset airflow_events_1 in destination dlt.destinations.duckdb:
 Selected schema:: github_repo_events
 Selected resource(s):: ['repo_events']
@@ -214,11 +214,13 @@ dlt pipeline chess_pipeline drop --state-paths archives
 This will select the `archives` key in `chess` source:
 
 ```json
-sources:{
-  "chess": {
-    "archives": [
-      "https://api.chess.com/pub/player/magnuscarlsen/games/2022/05",
-    ]
+{
+  "sources":{
+    "chess": {
+      "archives": [
+        "https://api.chess.com/pub/player/magnuscarlsen/games/2022/05"
+      ]
+    }
   }
 }
 ```
@@ -228,7 +230,7 @@ sources:{
 
 ### List all pipelines on the local machine
 
-```shell
+```sh
 dlt pipeline --list-pipelines
 ```
 
diff --git a/docs/website/docs/reference/installation.md b/docs/website/docs/reference/installation.md
index a802c34597..7913506f56 100644
--- a/docs/website/docs/reference/installation.md
+++ b/docs/website/docs/reference/installation.md
@@ -10,7 +10,7 @@ keywords: [installation, environment, pip install]
 
 ### Make sure you are using **Python 3.8-3.12** and have `pip` installed
 
-```bash
+```sh
 python --version
 pip --version
 ```
@@ -22,7 +22,7 @@ pip --version
 
 You can install Python 3.10 with an `apt` command.
 
-```bash
+```sh
 sudo apt update
 sudo apt install python3.10
 sudo apt install python3.10-venv
@@ -33,7 +33,7 @@ sudo apt install python3.10-venv
 
 Once you have installed [Homebrew](https://brew.sh), you can install Python 3.10.
 
-```bash
+```sh
 brew update
 brew install python@3.10
 ```
@@ -44,7 +44,7 @@ brew install python@3.10
 You need to install [Python 3.10 (64-bit version) for Windows](https://www.python.org/downloads/windows/).
 After this, you can then install `pip`.
 
-```bash
+```sh
 C:\> pip3 install -U pip
 ```
 
@@ -59,13 +59,13 @@ C:\> pip3 install -U pip
 
 Create a new virtual environment by making a `./env` directory to hold it.
 
-```bash
+```sh
 python -m venv ./env
 ```
 
 Activate the virtual environment:
 
-```bash
+```sh
 source ./env/bin/activate
 ```
 
@@ -74,13 +74,13 @@ source ./env/bin/activate
 
 Create a new virtual environment by making a `./env` directory to hold it.
 
-```bash
+```sh
 python -m venv ./env
 ```
 
 Activate the virtual environment:
 
-```bash
+```sh
 source ./env/bin/activate
 ```
 
@@ -106,6 +106,6 @@ C:\> .\env\Scripts\activate
 
 You can install `dlt` in your virtual environment by running:
 
-```bash
+```sh
 pip install -U dlt
 ```
diff --git a/docs/website/docs/reference/performance.md b/docs/website/docs/reference/performance.md
index 7c095b53d4..e0a6a8dc2e 100644
--- a/docs/website/docs/reference/performance.md
+++ b/docs/website/docs/reference/performance.md
@@ -514,7 +514,7 @@ next_item_mode="fifo"
 - Dataclasses
 
 Import the module as follows
-```python
+```py
 from dlt.common import json
 ```
 
@@ -536,18 +536,18 @@ For most use cases this is a drop in replacement for `requests`, so:
 
 :heavy_multiplication_x: **Don't**
 
-```python
+```py
 import requests
 ```
 :heavy_check_mark: **Do**
 
-```python
+```py
 from dlt.sources.helpers import requests
 ```
 
 And use it just like you would use `requests`:
 
-```python
+```py
 response = requests.get('https://example.com/api/contacts', headers={'Authorization': MY_API_KEY})
 data = response.json()
 ...
@@ -590,7 +590,7 @@ For more control you can create your own instance of `dlt.sources.requests.Clien
 
 This lets you customize which status codes and exceptions to retry on:
 
-```python
+```py
 from dlt.sources.helpers import requests
 
 http_client = requests.Client(
@@ -604,7 +604,7 @@ This is sometimes needed when loading from non-standard APIs which don't use HTT
 
 For example:
 
-```python
+```py
 from dlt.sources.helpers import requests
 
 def retry_if_error_key(response: Optional[requests.Response], exception: Optional[BaseException]) -> bool:
diff --git a/docs/website/docs/reference/telemetry.md b/docs/website/docs/reference/telemetry.md
index 85a851152e..9c5d790e6f 100644
--- a/docs/website/docs/reference/telemetry.md
+++ b/docs/website/docs/reference/telemetry.md
@@ -18,19 +18,19 @@ You can disable telemetry by adding `--disable-telemetry` to any dlt
 
 This command will disable telemetry both in the current project and globally for the whole machine:
 
-```shell
+```sh
 dlt --disable-telemetry
 ```
 
 While this command will also permanently disable telemetry and then initialize the `chess` pipeline:
 
-```shell
+```sh
 dlt --disable-telemetry init chess duckdb
 ```
 
 You can check the current telemetry status with this command:
 
-```shell
+```sh
 dlt telemetry
 ```
 
diff --git a/docs/website/docs/running-in-production/alerting.md b/docs/website/docs/running-in-production/alerting.md
index 1364c1f988..c1fdb57ec2 100644
--- a/docs/website/docs/running-in-production/alerting.md
+++ b/docs/website/docs/running-in-production/alerting.md
@@ -42,7 +42,7 @@ receiving rich information on executed pipelines, including encountered errors a
 
 Alerts can be sent to a Slack channel via Slack's incoming webhook URL. The code snippet below demonstrates automated Slack notifications for database table updates using the `send_slack_message` function.
 
-```python
+```py
 # Import the send_slack_message function from the dlt library
 from dlt.common.runtime.slack import send_slack_message
 
diff --git a/docs/website/docs/running-in-production/monitoring.md b/docs/website/docs/running-in-production/monitoring.md
index 8532bac36b..d9571bc6dd 100644
--- a/docs/website/docs/running-in-production/monitoring.md
+++ b/docs/website/docs/running-in-production/monitoring.md
@@ -64,14 +64,14 @@ For example, to monitor data loading, consider plotting "count of records by `lo
 ### Rows count
 To find the number of rows loaded per table, use the following command:
 
-```shell
+```sh
 dlt pipeline <pipeline_name> trace
 ```
 
 This command will display the names of the tables that were loaded and the number of rows in each table.
 The above command provides the row count for the Chess source. As shown below:
 
-```shell
+```sh
 Step normalize COMPLETED in 2.37 seconds.
 Normalized data for the following tables:
 - _dlt_pipeline_state: 1 row(s)
@@ -83,7 +83,7 @@ Normalized data for the following tables:
 ```
 
 To load these info back to the destination you can use the following:
-```python
+```py
 # Create a pipeline with the specified name, destination, and dataset
 # Run the pipeline
 
@@ -105,13 +105,13 @@ representation of the rows loaded with `load_id` for different tables:
 ### Data load time
 Data loading time for each table can be obtained by using the following command:
 
-```shell
+```sh
 dlt pipeline <pipeline_name> load-package
 ```
 
 The above information can also be obtained from the script as follows:
 
-```python
+```py
 info = pipeline.run(source, table_name="table_name", write_disposition='append')
 
 print(info.load_packages[0])
diff --git a/docs/website/docs/running-in-production/running.md b/docs/website/docs/running-in-production/running.md
index 2912ff7aa2..dc49cf7659 100644
--- a/docs/website/docs/running-in-production/running.md
+++ b/docs/website/docs/running-in-production/running.md
@@ -9,7 +9,7 @@ keywords: [running, production, tips]
 When running the pipeline in production, you may consider a few additions to your script. We'll use
 the script below as a starting point.
 
-```python
+```py
 import dlt
 from chess import chess
 
@@ -28,7 +28,7 @@ packages. Package information contains its state (`COMPLETED/PROCESSED`) and lis
 their statuses, file sizes, types and in case of failed jobs-the error messages from the
 destination.
 
-```python
+```py
     # see when load was started
     print(load_info.started_at)
     # print the information on the first load package and all jobs inside
@@ -39,7 +39,7 @@ destination.
 
 `load_info` may also be loaded into the destinations as below:
 
-```python
+```py
     # we reuse the pipeline instance below and load to the same dataset as data
     pipeline.run([load_info], table_name="_load_info")
 ```
@@ -50,7 +50,7 @@ where they were obtained. You can display and load trace info as shown below. Us
 to explore `trace` object further. The `normalize` step information contains the counts of rows per
 table of data that was normalized and then loaded.
 
-```python
+```py
     # print human friendly trace information
     print(pipeline.last_trace)
     # save trace to destination, sensitive data will be removed
@@ -59,7 +59,7 @@ table of data that was normalized and then loaded.
 
 You can also access the last `extract`, `normalize` and `load` infos directly:
 
-```python
+```py
     # print human friendly extract information
     print(pipeline.last_trace.last_extract_info)
     # print human friendly normalization information
@@ -79,7 +79,7 @@ In the package information you can also see the list of all tables and columns c
 destination during loading of that package. The code below displays all tables and schemas. Note that
 those objects are Typed Dictionaries, use your code editor to explore.
 
-```python
+```py
     # print all the new tables/columns in
     for package in load_info.load_packages:
         for table_name, table in package.schema_update.items():
@@ -91,7 +91,7 @@ those objects are Typed Dictionaries, use your code editor to explore.
 You can save only the new tables and column schemas to the destination. Note that the code above
 that saves `load_info` saves this data as well.
 
-```python
+```py
     # save just the new tables
     table_updates = [p.asdict()["tables"] for p in load_info.load_packages]
     pipeline.run(table_updates, table_name="_new_tables")
@@ -122,14 +122,14 @@ slack_incoming_hook="https://hooks.slack.com/services/T04DHMAF13Q/B04E7B1MQ1H/TD
 
 or
 
-```
+```sh
 RUNTIME__SLACK_INCOMING_HOOK="https://hooks.slack.com/services/T04DHMAF13Q/B04E7B1MQ1H/TDHEI123WUEE"
 ```
 
 Then the configured hook is available via pipeline object, we also provide convenience method to
 send Slack messages:
 
-```python
+```py
 from dlt.common.runtime.slack import send_slack_message
 
 send_slack_message(pipeline.runtime_config.slack_incoming_hook, message)
@@ -191,11 +191,13 @@ There are two different types of exceptions in `__context__`:
 Code below tells one exception type from another. Note that we provide retry strategy helpers that
 does that for you.
 
-```python
+```py
 from dlt.common.exceptions import TerminalException
 
-if isinstance(ex, TerminalException) or (ex.__context__ is not None and isinstance(ex.__context__, TerminalException)):
-    return False
+def check(ex: Exception):
+    if isinstance(ex, TerminalException) or (ex.__context__ is not None and isinstance(ex.__context__, TerminalException)):
+        return False
+    return True
 ```
 
 ### Failed jobs
@@ -204,7 +206,7 @@ If any job in the package **fail terminally** it will be moved to `failed_jobs`
 such status. By default **no exception is raised** and other jobs will be processed and completed.
 You may inspect if the failed jobs are present by checking the load info as follows:
 
-```python
+```py
 # returns True if there are failed jobs in any of the load packages
 print(load_info.has_failed_jobs)
 # raises terminal exception if there are any failed jobs
@@ -241,7 +243,7 @@ the [tenacity](https://tenacity.readthedocs.io/en/latest/) library. Snippet belo
 `load` stage with the `retry_load` strategy and defined back-off or re-raise exception for any other
 steps (`extract`, `normalize`) and for terminal exceptions.
 
-```python
+```py
 from tenacity import stop_after_attempt, retry_if_exception, Retrying, retry
 from dlt.common.runtime.slack import send_slack_message
 from dlt.pipeline.helpers import retry_load
@@ -264,7 +266,7 @@ if __name__ == "__main__" :
 
 You can also use `tenacity` to decorate functions. This example additionally retries on `extract`:
 
-```python
+```py
 if __name__ == "__main__" :
     pipeline = dlt.pipeline(pipeline_name="chess_pipeline", destination='duckdb', dataset_name="games_data")
 
diff --git a/docs/website/docs/running-in-production/tracing.md b/docs/website/docs/running-in-production/tracing.md
index a6d269fa82..b30750d53f 100644
--- a/docs/website/docs/running-in-production/tracing.md
+++ b/docs/website/docs/running-in-production/tracing.md
@@ -42,7 +42,7 @@ sentry_dsn="https:///<...>"
 
 Alternatively, you can use environment variables:
 
-```shell
+```sh
 RUNTIME__SENTRY_DSN="https:///<...>"
 ```
 
diff --git a/docs/website/docs/tutorial/grouping-resources.md b/docs/website/docs/tutorial/grouping-resources.md
index a54ba97fe3..0e67287210 100644
--- a/docs/website/docs/tutorial/grouping-resources.md
+++ b/docs/website/docs/tutorial/grouping-resources.md
@@ -22,7 +22,7 @@ In the previous tutorial, we loaded issues from the GitHub API. Now we'll prepar
 def get_comments(
     updated_at = dlt.sources.incremental("updated_at", initial_value="1970-01-01T00:00:00Z")
 ):
-    url = f"https://api.github.com/repos/dlt-hub/dlt/comments?per_page=100"
+    url = "https://api.github.com/repos/dlt-hub/dlt/comments?per_page=100"
 
     while True:
         response = requests.get(url)
@@ -58,9 +58,9 @@ def get_issues(
     updated_at = dlt.sources.incremental("updated_at", initial_value="1970-01-01T00:00:00Z")
 ):
     url = (
-        f"https://api.github.com/repos/dlt-hub/dlt/issues"
+        "https://api.github.com/repos/dlt-hub/dlt/issues"
         f"?since={updated_at.last_value}&per_page=100"
-        f"&sort=updated&directions=desc&state=open"
+        "&sort=updated&directions=desc&state=open"
     )
 
     while True:
@@ -83,8 +83,8 @@ def get_comments(
     updated_at = dlt.sources.incremental("updated_at", initial_value="1970-01-01T00:00:00Z")
 ):
     url = (
-        f"https://api.github.com/repos/dlt-hub/dlt/comments"
-        f"?per_page=100"
+        "https://api.github.com/repos/dlt-hub/dlt/comments"
+        "?per_page=100"
     )
 
     while True:
@@ -117,7 +117,7 @@ print(load_info)
 
 You've noticed that there's a lot of code duplication in the `get_issues` and `get_comments` functions. We can reduce that by extracting the common fetching code into a separate function and use it in both resources. Even better, we can use `dlt.resource` as a function and pass it the `fetch_github_data()` generator function directly. Here's the refactored code:
 
-```python
+```py
 import dlt
 from dlt.sources.helpers import requests
 
@@ -163,7 +163,7 @@ For the next step we'd want to get the [number of repository clones](https://doc
 
 Let's handle this by changing our `fetch_github_data()` first:
 
-```python
+```py
 def fetch_github_data(endpoint, params={}, access_token=None):
     """Fetch data from GitHub API based on endpoint and params."""
     headers = {"Authorization": f"Bearer {access_token}"} if access_token else {}
@@ -196,7 +196,7 @@ def github_source(access_token):
 
 Here, we added `access_token` parameter and now we can use it to pass the access token to the request:
 
-```python
+```py
 load_info = pipeline.run(github_source(access_token="ghp_XXXXX"))
 ```
 
@@ -204,7 +204,7 @@ It's a good start. But we'd want to follow the best practices and not hardcode t
 
 To use it, change the `github_source()` function to:
 
-```python
+```py
 @dlt.source
 def github_source(
     access_token: str = dlt.secrets.value,
@@ -228,7 +228,7 @@ access_token = "ghp_A...3aRY"
 
 Now we can run the script and it will load the data from the `traffic/clones` endpoint:
 
-```python
+```py
 import dlt
 from dlt.sources.helpers import requests
 
@@ -278,7 +278,7 @@ load_info = pipeline.run(github_source())
 
 The next step is to make our dlt GitHub source reusable so it can load data from any GitHub repo. We'll do that by changing both `github_source()` and `fetch_github_data()` functions to accept the repo name as a parameter:
 
-```python
+```py
 import dlt
 from dlt.sources.helpers import requests
 
diff --git a/docs/website/docs/tutorial/load-data-from-an-api.md b/docs/website/docs/tutorial/load-data-from-an-api.md
index f14566b5c0..c5550567a0 100644
--- a/docs/website/docs/tutorial/load-data-from-an-api.md
+++ b/docs/website/docs/tutorial/load-data-from-an-api.md
@@ -8,7 +8,7 @@ In this section, we will retrieve and load data from the GitHub API into [DuckDB
 
 Before we start, make sure you have installed `dlt` with the DuckDB dependency:
 
-```bash
+```sh
 pip install "dlt[duckdb]"
 ```
 
@@ -52,13 +52,13 @@ Here's what the code above does:
 
 Save `github_issues.py` and run the following command:
 
-```bash
+```sh
 python github_issues.py
 ```
 
 Once the data has been loaded, you can inspect the created dataset using the Streamlit app:
 
-```bash
+```sh
 dlt pipeline github_issues show
 ```
 
@@ -182,7 +182,7 @@ Now you can run this script on a daily schedule and each day you’ll load only
 Between pipeline runs, `dlt` keeps the state in the same database it loaded data to.
 Peek into that state, the tables loaded and get other information with:
 
-```shell
+```sh
 dlt pipeline -v github_issues_incremental info
 ```
 :::
diff --git a/docs/website/docs/walkthroughs/add-a-verified-source.md b/docs/website/docs/walkthroughs/add-a-verified-source.md
index bd7bd9894e..d7cd24b544 100644
--- a/docs/website/docs/walkthroughs/add-a-verified-source.md
+++ b/docs/website/docs/walkthroughs/add-a-verified-source.md
@@ -16,27 +16,27 @@ steps below.
 
 Create a new empty directory for your `dlt` project by running:
 
-```shell
+```sh
 mkdir various_pipelines
 cd various_pipelines
 ```
 
 List available verified sources to see their names and descriptions:
 
-```bash
+```sh
 dlt init --list-verified-sources
 ```
 
 Now pick one of the source names, for example `pipedrive` and a destination i.e. `bigquery`:
 
-```bash
+```sh
 dlt init pipedrive bigquery
 ```
 
 The command will create your pipeline project by copying over the `pipedrive` folder and creating a
 `.dlt` folder:
 
-```
+```text
 ├── .dlt
 │   ├── config.toml
 │   └── secrets.toml
@@ -53,7 +53,7 @@ The command will create your pipeline project by copying over the `pipedrive` fo
 After running the command, read the command output for the instructions on how to install the
 dependencies:
 
-```
+```text
 Verified source pipedrive was added to your project!
 * See the usage examples and code snippets to copy from pipedrive_pipeline.py
 * Add credentials for bigquery and other secrets in .dlt/secrets.toml
@@ -100,7 +100,7 @@ You can modify an existing verified source in place.
 
 ## 5. Add more sources to your project
 
-```bash
+```sh
 dlt init chess duckdb
 ```
 
@@ -116,7 +116,7 @@ pipeline:
 To update the verified source you have to the newest online version just do the same init command in
 the parent folder:
 
-```bash
+```sh
 dlt init pipedrive bigquery
 ```
 
@@ -124,19 +124,19 @@ dlt init pipedrive bigquery
 
 To find out more info about this command, use --help:
 
-```bash
+```sh
 dlt init --help
 ```
 
 To deploy from a branch of the `verified-sources` repo, you can use the following:
 
-```bash
+```sh
 dlt init source destination --branch <branch_name>
 ```
 
 To deploy from another repo, you could fork the verified-sources repo and then provide the new repo
 url as below, replacing `dlt-hub` with your fork name:
 
-```bash
+```sh
 dlt init pipedrive bigquery --location "https://github.com/dlt-hub/verified-sources"
 ```
diff --git a/docs/website/docs/walkthroughs/add_credentials.md b/docs/website/docs/walkthroughs/add_credentials.md
index 748b8c6d8a..586d1c2a93 100644
--- a/docs/website/docs/walkthroughs/add_credentials.md
+++ b/docs/website/docs/walkthroughs/add_credentials.md
@@ -67,7 +67,7 @@ For environment variables all names are capitalized and sections are separated w
 
 For example, for the secrets mentioned above, we would need to set them in the environment:
 
-```shell
+```sh
 SOURCES__PIPEDRIVE__PIPEDRIVE_API_KEY
 DESTINATION__BIGQUERY__CREDENTIALS__PROJECT_ID
 DESTINATION__BIGQUERY__CREDENTIALS__PRIVATE_KEY
diff --git a/docs/website/docs/walkthroughs/adjust-a-schema.md b/docs/website/docs/walkthroughs/adjust-a-schema.md
index fb27ab3f9a..cfe2d056b0 100644
--- a/docs/website/docs/walkthroughs/adjust-a-schema.md
+++ b/docs/website/docs/walkthroughs/adjust-a-schema.md
@@ -18,7 +18,7 @@ schema. Set up an import folder from which `dlt` will read your modifications by
 
 Following our example in [run a pipeline](run-a-pipeline.md):
 
-```python
+```py
 dlt.pipeline(
     import_schema_path="schemas/import",
     export_schema_path="schemas/export",
@@ -30,7 +30,7 @@ dlt.pipeline(
 
 The following folder structure in the project root folder will be created:
 
-```
+```text
 schemas
     |---import/
     |---export/
@@ -78,7 +78,7 @@ In next steps we'll experiment a lot, you will be warned to set `full_refresh=Tr
 So if you have a `yaml` file, and you change it (e.g. change a data type or add a hint),
 then you need to **delete the dataset**
 or set `full_refresh=True`:
-```python
+```py
 dlt.pipeline(
     import_schema_path="schemas/import",
     export_schema_path="schemas/export",
diff --git a/docs/website/docs/walkthroughs/create-a-pipeline.md b/docs/website/docs/walkthroughs/create-a-pipeline.md
index 0facdfa884..1d5974efbe 100644
--- a/docs/website/docs/walkthroughs/create-a-pipeline.md
+++ b/docs/website/docs/walkthroughs/create-a-pipeline.md
@@ -18,19 +18,19 @@ steps below.
 
 Create a new empty directory for your `dlt` project by running:
 
-```bash
+```sh
 mkdir weatherapi_duckdb && cd weatherapi_duckdb
 ```
 
 Start a `dlt` project with a pipeline template that loads data to DuckDB by running:
 
-```bash
+```sh
 dlt init weatherapi duckdb
 ```
 
 Install the dependencies necessary for DuckDB:
 
-```bash
+```sh
 pip install -r requirements.txt
 ```
 
@@ -48,7 +48,7 @@ Copy the value of the API key into `.dlt/secrets.toml`:
 api_secret_key = '<api key value>'
 ```
 The **secret name** corresponds to the **argument name** in the source function. Below `api_secret_key` [will get its value](../general-usage/credentials/configuration.md#general-usage-and-an-example) from `secrets.toml` when `weatherapi_source()` is called.
-```python
+```py
 @dlt.source
 def weatherapi_source(api_secret_key=dlt.secrets.value):
   ...
@@ -56,7 +56,7 @@ def weatherapi_source(api_secret_key=dlt.secrets.value):
 
 Run the `weatherapi.py` pipeline script to test that authentication headers look fine:
 
-```bash
+```sh
 python3 weatherapi.py
 ```
 
@@ -67,7 +67,7 @@ Your API key should be printed out to stdout along with some test data.
 Replace the definition of the `weatherapi_resource` function definition in the `weatherapi.py`
 pipeline script with a call to the WeatherAPI.com API:
 
-```python
+```py
 @dlt.resource(write_disposition="append")
 def weatherapi_resource(api_secret_key=dlt.secrets.value):
     url = "https://api.weatherapi.com/v1/current.json"
@@ -82,7 +82,7 @@ def weatherapi_resource(api_secret_key=dlt.secrets.value):
 
 Run the `weatherapi.py` pipeline script to test that the API call works:
 
-```bash
+```sh
 python3 weatherapi.py
 ```
 
@@ -93,7 +93,7 @@ This should print out the weather in New York City right now.
 Remove the `exit()` call from the `main` function in `weatherapi.py`, so that running the
 `python3 weatherapi.py` command will now also run the pipeline:
 
-```python
+```py
 if __name__=='__main__':
 
     # configure the pipeline with your destination details
@@ -118,13 +118,13 @@ if __name__=='__main__':
 
 Run the `weatherapi.py` pipeline script to load data into DuckDB:
 
-```bash
+```sh
 python3 weatherapi.py
 ```
 
 Then this command to see that the data loaded:
 
-```bash
+```sh
 dlt pipeline weatherapi show
 ```
 
diff --git a/docs/website/docs/walkthroughs/create-new-destination.md b/docs/website/docs/walkthroughs/create-new-destination.md
index 3e64cc55ab..1b72b81e3e 100644
--- a/docs/website/docs/walkthroughs/create-new-destination.md
+++ b/docs/website/docs/walkthroughs/create-new-destination.md
@@ -1,5 +1,8 @@
 # Create new destination
 
+:::tip
+You can use `@dlt.destination` decorator and [implement a sink function](../dlt-ecosystem/destinations/destination.md). This is a perfect way to implement reverse ETL components that push data back to REST APIs.
+:::
 
 `dlt` can import destinations from external python modules. Below we show how to quickly add a [dbapi](https://peps.python.org/pep-0249/) based destination. `dbapi` is a standardized interface to access
 databases in Python. If you used ie. postgres (ie. `psycopg2`) you are already familiar with it.
@@ -18,7 +21,7 @@ Destinations are implemented in python packages under: `dlt.destinations.impl.<d
 
 ## 1. Copy existing destination to your `dlt` project
 Initialize a new project with [dlt init](../reference/command-line-interface.md#dlt-init)
-```shell
+```sh
 dlt init github postgres
 ```
 This adds `github` verified source (it produces quite complicated datasets and that good for testing, does not require credentials to use) and `postgres` credentials (connection-string-like) that we'll repurpose later.
@@ -174,7 +177,7 @@ Add an import to your factory in [`dlt.destinations.__init__`](https://github.co
 
 ## Testing
 We can quickly repurpose existing github source and `secrets.toml` already present in the project to test new destination. Let's assume that the module name is `presto`, same for the destination name and config section name. Here's our testing script `github_pipeline.py`
-```python
+```py
 import dlt
 
 from github import github_repo_events
@@ -207,7 +210,7 @@ port = 5432
 Mind that in the script above we import the `presto` module and then pass it in `destination` argument to `dlt.pipeline`. Github pipeline will load the events in `append` mode. You may force `replace` and `merge` modes in `pipeline.run` to check more advanced behavior of the destination.
 
 After executing the pipeline script:
-```
+```text
 python github_pipeline.py
 got page https://api.github.com/repos/apache/airflow/events?per_page=100, requests left: 59
 got page https://api.github.com/repositories/33884891/events?per_page=100&page=2, requests left: 58
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-gcp-cloud-function-as-webhook.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-gcp-cloud-function-as-webhook.md
index 2e8cdfe7d3..fc32aa2c30 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-gcp-cloud-function-as-webhook.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-gcp-cloud-function-as-webhook.md
@@ -15,7 +15,7 @@ You can setup GCP cloud function webhook using `dlt` as follows:
 5. Select "Python 3.10" as the environment.
 6. Use the code provided to set up the cloud function for event ingestion:
     
-    ```python
+    ```py
     import dlt
     import json
     import time
@@ -40,7 +40,7 @@ You can setup GCP cloud function webhook using `dlt` as follows:
 7. Set the function name as "your_webhook" in the Entry point field.
 8. In the requirements.txt file, specify the necessary packages:
     
-    ```python
+    ```py
     # Function dependencies, for example:
     # package>=version
     dlt
@@ -56,7 +56,7 @@ You can setup GCP cloud function webhook using `dlt` as follows:
 
 To manually test the function you have created, you can send a manual POST request as a webhook using the following code:
 
-```bash
+```sh
 import requests
     
 webhook_url = 'please set me up!' # Your cloud function Trigger URL
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md
index 365f6747dc..329f484874 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md
@@ -23,7 +23,7 @@ initialize a Git repository in your `dlt` project directory and push it to GitHu
 
 Before you can deploy, you must run your pipeline locally at least once.
 
-```bash
+```sh
 python3 {pipeline_name}_pipeline.py
 ```
 
@@ -31,12 +31,12 @@ This should successfully load data from the source to the destination once and a
 
 ## 3. Initialize deployment
 First you need to add additional dependencies that `deploy` command requires:
-```bash
+```sh
 pip install "dlt[cli]"
 ```
 
 then:
-```bash
+```sh
 dlt deploy {pipeline_name}_pipeline.py airflow-composer
 ```
 
@@ -65,12 +65,12 @@ By default, the `dlt deploy` command shows you the deployment credentials in ENV
 ## Example with the pipedrive pipeline
 
 ### 1. Run the deploy command
-```bash
+```sh
 dlt deploy pipedrive_pipeline.py airflow-composer
 ```
 where `pipedrive_pipeline.py` is the pipeline script that you just ran and `airflow-composer` is a deployment method. The command will create deployment files and provide instructions to set up the credentials.
 
-```toml
+```text
 Your airflow-composer deployment for pipeline pipedrive is ready!
 * The airflow cloudbuild.yaml file was created in build.
 * The dag_pipedrive.py script was created in dags.
@@ -92,12 +92,12 @@ pipedrive_api_key = "c66..."
 
 > 💡 `deploy` command will use [Airflow variable](#4-add-credentials) called `dlt_secrets_toml` to store all the required secrets as `toml` fragment. You can also use **environment variables** by passing `--secrets-format env` option:
 
-```bash
+```sh
 dlt deploy pipedrive_pipeline.py airflow-composer --secrets-format env
 ```
 which will output the environment variable names and their values.
 
-```toml
+```sh
 3. Add the following secret values (typically stored in ./.dlt/secrets.toml):
 SOURCES__PIPEDRIVE__PIPEDRIVE_API_KEY
 
@@ -114,7 +114,7 @@ c66c..
 In directory `dags/` you can find the file `dag_pipedrive.py` that you need to edit. It has the
 following structure:
 
-```python
+```py
 import dlt
 from airflow.decorators import dag
 from dlt.common import pendulum
@@ -155,8 +155,41 @@ def load_data():
     )
     # Create the source, the "serialize" decompose option
     # will convert dlt resources into Airflow tasks.
-    # Use "none" to disable it
-    tasks.add_run(pipeline, source(), decompose="serialize", trigger_rule="all_done", retries=0, provide_context=True)
+    # Use "none" to disable it.
+    tasks.add_run(
+        pipeline,
+        source(),
+        decompose="serialize",
+        trigger_rule="all_done",
+        retries=0,
+        provide_context=True
+    )
+
+    # The "parallel" decompose option will convert dlt
+    # resources into parallel Airflow tasks, except the
+    # first one, which will be executed before any other tasks.
+    # All the tasks will be executed in the same pipeline state.
+    # tasks.add_run(
+    #   pipeline,
+    #   source(),
+    #   decompose="parallel",
+    #   trigger_rule="all_done",
+    #   retries=0,
+    #   provide_context=True
+    # )
+
+    # The "parallel-isolated" decompose option will convert dlt
+    # resources into parallel Airflow tasks, except the
+    # first one, which will be executed before any other tasks.
+    # In this mode, all the tasks will use separate pipeline states.
+    # tasks.add_run(
+    #   pipeline,
+    #   source(),
+    #   decompose="parallel-isolated",
+    #   trigger_rule="all_done",
+    #   retries=0,
+    #   provide_context=True
+    # )
 
 load_data()
 ```
@@ -169,7 +202,7 @@ load_data()
     (`use_task_logger=True`) and set the retry policy as a Retrying class object with three restart
     attempts.
 
-  ```python
+  ```py
   from tenacity import Retrying, stop_after_attempt
 
   # Set `use_data_folder` to True to store temporary data on the `data` bucket.
@@ -193,7 +226,7 @@ created DAG script.
 
 - Import your sources from your existing pipeline script - after task group is created:
 
-  ```python
+  ```py
   # Import your source from pipeline script
   from pipedrive import pipedrive_source
   ```
@@ -202,7 +235,7 @@ created DAG script.
   then copy it here. For example, look at the `load_from_start_date` function in
   `pipedrive_pipeline.py`:
 
-  ```python
+  ```py
   """Example to incrementally load activities limited to items updated after a given date"""
 
   pipeline = dlt.pipeline(
@@ -231,7 +264,7 @@ created DAG script.
   activities_source\]), so we have to add them sequentially. See
   [Troubleshooting](deploy-with-airflow-composer.md#troubleshooting) section.
 
-  ```python
+  ```py
   # Create the source,
   # the "serialize" decompose option will convert
   # dlt resources into Airflow tasks.
@@ -265,7 +298,7 @@ created DAG script.
 
 As a result, we will get a script of the following form:
 
-```python
+```py
 import dlt
 from airflow.decorators import dag
 from dlt.common import pendulum
@@ -359,7 +392,7 @@ There are two ways to pass the credentials
    - During the execution of the `deploy` command with `--secrets-format toml`, secret variables
      will be displayed in the output:
 
-     ```toml
+     ```sh
      3. Add the following toml-string to the Airflow UI as the dlt_secrets_toml variable.
 
      [sources.pipedrive]
@@ -376,7 +409,7 @@ There are two ways to pass the credentials
    - During the execution of the `deploy` command with `--secrets-format env` (by default),
      environment variables will be displayed in the output:
 
-     ```toml
+     ```sh
      3. Add the following secret values (typically stored in ./.dlt/secrets.toml):
      SOURCES__PIPEDRIVE__PIPEDRIVE_API_KEY
 
@@ -446,19 +479,19 @@ There are two ways to pass the credentials
 
    Add stage deployment files to commit. Use your Git UI or the following command:
 
-   ```bash
+   ```sh
    git add dags/dag_pipedrive.py build/cloudbuild.yaml
    ```
 
    Commit the files above. Use your Git UI or the following command:
 
-   ```bash
+   ```sh
    git commit -m 'initiate pipedrive pipeline with Airflow'
    ```
 
    Push changes to GitHub. Use your Git UI or the following command:
 
-   ```bash
+   ```sh
    git push origin
    ```
 
@@ -481,7 +514,7 @@ folder.
 
 If you run it locally, then check your `airflow.cfg` file (line 4):
 
-```
+```text
 # The folder where your airflow pipelines live, most likely a
 # subfolder in a code repository. This path must be absolute.
 # default: dags_folder = ~/airflow/dags
@@ -497,7 +530,7 @@ unacceptable data structure and provided `decompose = "serialize"`.
 
 For example:
 
-```python
+```py
 tasks.add_run(
     pipeline=pipeline,
     data=[source, activities_source],
@@ -512,7 +545,7 @@ Airflow tasks.
 PipelineTasksGroup can't handle the list of sources in the “serialize” mode, it can only decompose
 `DltSource`, so we have to add them sequentially:
 
-```python
+```py
 tasks.add_run(
     pipeline=pipeline,
     data=source,
@@ -531,7 +564,7 @@ Or you should set the `decompose = "none”` to run it as the one Airflow task.
 
 In case of `pipedrive` pipeline we tried to load data from “custom_fields_mapping” twice.
 
-```python
+```py
 # First source configure to load everything except activities from the beginning
 source = pipedrive_source()
 source.resources["activities"].selected = False
@@ -545,7 +578,7 @@ activities_source = pipedrive_source(
 
 Because of this we got the following error:
 
-```python
+```sh
 airflow.exceptions.DuplicateTaskIdFound:
 Task id ‘pipedrive.pipedrive_custom_fields_mapping’ has already been added to the DAG
 ```
@@ -554,7 +587,7 @@ Task ids in the task group should be still unique globally, so in this case we h
 “custom_fields_mapping” from `activities_source`. “custom_fields_mapping” will be taken from the
 current state to translate custom field hashes to names:
 
-```python
+```py
 activities_source = pipedrive_source(
     since_timestamp="2023-03-01 00:00:00Z"
 ).with_resources("activities")
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-github-actions.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-github-actions.md
index 7604e14746..1578e8fa68 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-github-actions.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-github-actions.md
@@ -22,7 +22,7 @@ initialize a Git repo in your `dlt` project directory and push it to GitHub as d
 
 Before you can deploy, you need a working pipeline. Make sure that it is working by running
 
-```shell
+```sh
 python3 chess_pipeline.py # replace chess_pipeline.py with your pipeline file
 ```
 
@@ -30,11 +30,11 @@ This should successfully load data from the source to the destination once.
 
 ## Initialize deployment
 First you need to add additional dependencies that `deploy` command requires:
-```bash
+```sh
 pip install "dlt[cli]"
 ```
 then the command below will create a Github workflow that runs your pipeline script every 30 minutes:
-```shell
+```sh
 dlt deploy chess_pipeline.py github-action --schedule "*/30 * * * *"
 ```
 
@@ -52,13 +52,13 @@ out by the `dlt deploy` command line tool.
 
 To finish the GitHub Actions workflow setup, you need to first add and commit your files:
 
-```shell
+```sh
 git add . && git commit -m 'pipeline deployed with github action'
 ```
 
 And then push them to GitHub:
 
-```shell
+```sh
 git push origin
 ```
 
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions.md
index 897b3257b9..008f00e5d0 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions.md
@@ -32,7 +32,7 @@ To deploy the pipeline, we'll use the Google Cloud Source Repositories method.
    - Run the following command to initialise the verified source with Notion and create a pipeline
      example with BigQuery as the target.
 
-     ```bash
+     ```sh
      dlt init notion bigquery
      ```
 
@@ -43,7 +43,7 @@ To deploy the pipeline, we'll use the Google Cloud Source Repositories method.
      in the `dlthub` [documentation](../../dlt-ecosystem/verified-sources/notion).
 1. Create a new Python file called "main.py" in the main directory. The file can be configured as
    follows:
-   ```python
+   ```py
    from notion_pipeline import load_databases
 
    def pipeline_notion(request):
@@ -59,7 +59,7 @@ To deploy the pipeline, we'll use the Google Cloud Source Repositories method.
 In a shell editor, navigate to the main directory where the "main.py" file is located and run the
 following command in the terminal:
 
-```bash
+```sh
 gcloud functions deploy pipeline_notion --runtime python310 \
   --trigger-http --allow-unauthenticated --source . --timeout 300
 ```
diff --git a/docs/website/docs/walkthroughs/dispatch-to-multiple-tables.md b/docs/website/docs/walkthroughs/dispatch-to-multiple-tables.md
index 2f63f409f0..0e342a3fea 100644
--- a/docs/website/docs/walkthroughs/dispatch-to-multiple-tables.md
+++ b/docs/website/docs/walkthroughs/dispatch-to-multiple-tables.md
@@ -11,7 +11,7 @@ We'll use the [GitHub API](https://docs.github.com/en/rest) to fetch the events
 
 1. Install dlt with duckdb support:
 
-```shell
+```sh
 pip install dlt[duckdb]
 ```
 
@@ -68,20 +68,20 @@ We name the tables using a function that receives an event data and returns tabl
 
 3. Now run the script:
 
-```shell
+```sh
 python github_events_dispatch.py
 ```
 
 4. Peek at created tables:
 
-```shell
+```sh
 dlt pipeline -v github_events info
 dlt pipeline github_events trace
 ```
 
 5. And preview the data:
 
-```shell
+```sh
 dlt pipeline -v github_events show
 ```
 
diff --git a/docs/website/docs/walkthroughs/run-a-pipeline.md b/docs/website/docs/walkthroughs/run-a-pipeline.md
index d17f941e94..697ae0fff5 100644
--- a/docs/website/docs/walkthroughs/run-a-pipeline.md
+++ b/docs/website/docs/walkthroughs/run-a-pipeline.md
@@ -16,7 +16,7 @@ Once you [created a new pipeline](create-a-pipeline) or
 (or [customize](add-a-verified-source#3-customize-or-write-a-pipeline-script)) a pipeline script,
 like the one below that loads the data from [chess.com](https://www.chess.com) API:
 
-```python
+```py
 import dlt
 from chess import chess
 
@@ -34,13 +34,13 @@ packages. The `run` method returns a `load_info` object that, when printed, disp
 with pipeline and dataset names, ids of the load packages, optionally with the information on failed
 jobs. Add the following line to your script:
 
-```python
+```py
 print(load_info)
 ```
 
 To get this printed:
 
-```
+```text
 Pipeline chess_pipeline completed in 1.80 seconds
 1 load package(s) were loaded to destination duckdb and into dataset games_data
 The duckdb destination used duckdb:////home/user-name/src/dlt_tests/dlt-cmd-test-3/chess_pipeline.duckdb location to store data
@@ -54,7 +54,7 @@ progress bars or console logging to observe what pipeline is doing. We support m
 progress bar libraries, Python loggers or just a text console. To demonstrate, let's modify the
 script to get a year of chess games data:
 
-```python
+```py
 data = chess(['magnuscarlsen', 'rpragchess'], start_month="2021/11", end_month="2022/12")
 ```
 
@@ -92,7 +92,7 @@ dlt pipeline chess_pipeline show
 
 This will launch a Streamlit app, that you can open in your browser:
 
-```
+```text
 Found pipeline chess_pipeline in /home/user-name/.dlt/pipelines
 
 Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
@@ -153,7 +153,7 @@ to solving your problem. Let us know if you come across one that is not clear to
 The most common exception that you will encounter looks like this. Here we modify our
 `chess_pipeline.py` script to load data into postgres, but we are not providing the password.
 
-```bash
+```sh
 CREDENTIALS="postgres://loader@localhost:5432/dlt_data" python chess_pipeline.py
 ...
 dlt.common.configuration.exceptions.ConfigFieldMissingException: Following fields are missing: ['password'] in configuration with spec PostgresCredentials
@@ -203,11 +203,11 @@ credentials.password="loader"
 `dlt` will raise `PipelineStepFailed` exception to inform you of a problem encountered during
 execution of particular step. You can catch those in code:
 
-```python
+```py
 from dlt.pipeline.exceptions import PipelineStepFailed
 
 try:
-    return pipeline.run(data)
+    pipeline.run(data)
 except PipelineStepFailed as step_failed:
     print(f"We failed at step: {step_failed.step} with step info {step_failed.step_info}")
     raise
@@ -219,7 +219,7 @@ Or use `trace` command to review the last exception. Here we provided a wrong po
 dlt pipeline chess_pipeline trace
 ```
 
-```
+```text
 Found pipeline chess_pipeline in /home/user-name/.dlt/pipelines
 Run started at 2023-03-28T09:13:56.277016+00:00 and FAILED in 0.01 seconds with 1 steps.
 Step run FAILED in 0.01 seconds.
@@ -233,7 +233,7 @@ to load it, even if it retries the process. In that case the job is marked as fa
 information is available. Please note that (if not otherwise configured), `dlt` **will not raise
 exception on failed jobs**.
 
-```
+```text
 Step run COMPLETED in 14.21 seconds.
 Pipeline chess_pipeline completed in 35.21 seconds
 1 load package(s) were loaded to destination dummy and into dataset None
@@ -251,7 +251,7 @@ dlt pipeline chess_pipeline failed-jobs
 
 To get following output:
 
-```
+```text
 Found pipeline chess_pipeline in /home/user-name/.dlt/pipelines
 Checking failed jobs in load id '1679996953.776288'
 JOB: players_games.80eb41650c.0.jsonl(players_games)
diff --git a/docs/website/docs/walkthroughs/share-a-dataset.md b/docs/website/docs/walkthroughs/share-a-dataset.md
index e91177110e..75ba856468 100644
--- a/docs/website/docs/walkthroughs/share-a-dataset.md
+++ b/docs/website/docs/walkthroughs/share-a-dataset.md
@@ -15,7 +15,7 @@ BigQuery:
 
 ## 1. Replace the "destination" argument with "bigquery"
 
-```python
+```py
 import dlt
 from chess import chess
 
@@ -59,7 +59,7 @@ client_email = "client_email" # please set me up!
 
 ## 4. Run the pipeline again
 
-```shell
+```sh
 python chess_pipeline.py
 ```
 
@@ -74,7 +74,7 @@ of them ('project_id', 'private_key', 'client_email') are missing. The exception
 list of all lookups for configuration performed -
 [here we explain how to read such list](run-a-pipeline.md#missing-secret-or-configuration-values).
 
-```
+```text
 dlt.common.configuration.exceptions.ConfigFieldMissingException: Following fields are missing: ['project_id', 'private_key', 'client_email'] in configuration with spec GcpServiceAccountCredentials
     for field "project_id" config providers and keys were tried in following order:
         In Environment Variables key CHESS__DESTINATION__BIGQUERY__CREDENTIALS__PROJECT_ID was not found.
@@ -99,7 +99,7 @@ The most common cases for the exception:
 Here BigQuery complain that the format of the `private_key` is incorrect. Practically this most
 often happens if you forgot to replace the placeholders in `secrets.toml` with real values:
 
-```
+```text
 <class 'dlt.destinations.exceptions.DestinationConnectionError'>
 Connection with BigQuerySqlClient to dataset name games_data failed. Please check if you configured the credentials at all and provided the right credentials values. You can be also denied access or your internet connection may be down. The actual reason given is: No key could be detected.
 ```
@@ -108,7 +108,7 @@ Connection with BigQuerySqlClient to dataset name games_data failed. Please chec
 
 [You must enable BigQuery API.](https://console.cloud.google.com/apis/dashboard)
 
-```
+```text
 <class 'google.api_core.exceptions.Forbidden'>
 403 POST https://bigquery.googleapis.com/bigquery/v2/projects/bq-walkthrough/jobs?prettyPrint=false: BigQuery API has not been used in project 364286133232 before or it is disabled. Enable it by visiting https://console.developers.google.com/apis/api/bigquery.googleapis.com/overview?project=364286133232 then retry. If you enabled this API recently, wait a few minutes for the action to propagate to our systems and retry.
 
@@ -122,7 +122,7 @@ Job ID: a5f84253-3c10-428b-b2c8-1a09b22af9b2
 Add `BigQuery Job User` as described in the
 [destination page](../dlt-ecosystem/destinations/bigquery.md).
 
-```
+```text
 <class 'google.api_core.exceptions.Forbidden'>
 403 POST https://bigquery.googleapis.com/bigquery/v2/projects/bq-walkthrough/jobs?prettyPrint=false: Access Denied: Project bq-walkthrough: User does not have bigquery.jobs.create permission in project bq-walkthrough.
 
@@ -135,7 +135,7 @@ Job ID: c1476d2c-883c-43f7-a5fe-73db195e7bcd
 Add `BigQuery Data Editor` as described in the
 [destination page](../dlt-ecosystem/destinations/bigquery.md).
 
-```
+```text
 <class 'dlt.destinations.exceptions.DatabaseTransientException'>
 403 Access Denied: Table bq-walkthrough:games_data._dlt_loads: User does not have permission to query table bq-walkthrough:games_data._dlt_loads, or perhaps it does not exist in location EU.
 
@@ -148,14 +148,14 @@ Job ID: 299a92a3-7761-45dd-a433-79fdeb0c1a46
 `dlt` does not support BigQuery when project has no billing enabled. If you see a stack trace where
 following warning appears:
 
-```
+```text
 <class 'dlt.destinations.exceptions.DatabaseTransientException'>
 403 Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. DML queries are not allowed in the free tier. Set up a billing account to remove this restriction.
 ```
 
 or
 
-```
+```text
 2023-06-08 16:16:26,769|[WARNING]|8096|dlt|load.py|complete_jobs:198|Job for players_games_83b8ac9e98_4_jsonl retried in load 1686233775.932288 with message {"error_result":{"reason":"billingNotEnabled","message":"Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. Table expiration time must be less than 60 days while in sandbox mode."},"errors":[{"reason":"billingNotEnabled","message":"Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. Table expiration time must be less than 60 days while in sandbox mode."}],"job_start":"2023-06-08T14:16:26.850000Z","job_end":"2023-06-08T14:16:26.850000Z","job_id":"players_games_83b8ac9e98_4_jsonl"}
 ```
 
diff --git a/docs/website/docs/walkthroughs/zendesk-weaviate.md b/docs/website/docs/walkthroughs/zendesk-weaviate.md
index df0812191d..b8bf833228 100644
--- a/docs/website/docs/walkthroughs/zendesk-weaviate.md
+++ b/docs/website/docs/walkthroughs/zendesk-weaviate.md
@@ -23,7 +23,7 @@ We're going to use some ready-made components from the [dlt ecosystem](https://d
 
 1. Create a new folder for your project, navigate to it, and create a virtual environment:
 
-    ```bash
+    ```sh
     mkdir zendesk-weaviate
     cd zendesk-weaviate
     python -m venv venv
@@ -31,13 +31,13 @@ We're going to use some ready-made components from the [dlt ecosystem](https://d
     ```
 2. Install dlt with Weaviate support
 
-    ```bash
+    ```sh
     pip install "dlt[weaviate]"
     ```
 
 3. Install dlt Zendesk verified source
 
-    ```bash
+    ```sh
     dlt init zendesk weaviate
     ```
 
@@ -77,7 +77,7 @@ X-OpenAI-Api-Key = "sk-..."
 
 When you run `dlt init zendesk weaviate`, dlt creates a file called `zendesk_pipeline.py` in the current directory. This file contains an example pipeline that you can use to load data from Zendesk source. Let's edit this file to make it work for our use case:
 
-```python
+```py
 import dlt
 from dlt.destinations.adapters import weaviate_adapter
 
@@ -122,7 +122,7 @@ Let's go through the code above step by step:
 
 Now that we have the pipeline configured, we can run the Python script:
 
-```bash
+```sh
 python zendesk_pipeline.py
 ```
 
@@ -132,7 +132,7 @@ We have successfully loaded the data from Zendesk to Weaviate. Let's check it ou
 
 We can now run a vector search query on the data we loaded into Weaviate. Create a new Python file called `query.py` and add the following code:
 
-```python
+```py
 import weaviate
 client = weaviate.Client(
     url='YOUR_WEAVIATE_URL',
@@ -168,15 +168,15 @@ The above code instantiates a Weaviate client and does a similarity search on th
                "subject": "How do I change the password for my account?",
                "description": "I forgot my password and I can't log in.",
                 "_additional": {
-                   "distance": 0.235,
-                },
+                   "distance": 0.235
+                }
             },
             {
                "subject": "I can't log in to my account.",
                "description": "The credentials I use to log in don't work.",
                "_additional": {
-                   "distance": 0.247,
-               },
+                   "distance": 0.247
+               }
             }
          ]
       }
diff --git a/docs/website/requirements.txt b/docs/website/requirements.txt
index 5ae2155875..b151ff6e41 100644
--- a/docs/website/requirements.txt
+++ b/docs/website/requirements.txt
@@ -1,2 +1,4 @@
 pydoc-markdown==4.8.2
 typing-extensions==4.6.3
+databind.json==4.4.2
+databind.core==4.4.2
\ No newline at end of file
diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js
index 6ef1b8361a..19300fcd3e 100644
--- a/docs/website/sidebars.js
+++ b/docs/website/sidebars.js
@@ -95,9 +95,10 @@ const sidebars = {
             'dlt-ecosystem/destinations/redshift',
             'dlt-ecosystem/destinations/snowflake',
             'dlt-ecosystem/destinations/athena',
-            'dlt-ecosystem/destinations/motherduck',
             'dlt-ecosystem/destinations/weaviate',
             'dlt-ecosystem/destinations/qdrant',
+            'dlt-ecosystem/destinations/destination',
+            'dlt-ecosystem/destinations/motherduck'
           ]
         },
       ],
@@ -275,7 +276,8 @@ const sidebars = {
         'examples/nested_data/index',
         'examples/qdrant_zendesk/index',
         'examples/google_sheets/index',
-        'examples/pdf_to_weaviate/index'
+        'examples/pdf_to_weaviate/index',
+        'examples/custom_destination_bigquery/index'
       ],
     },
     {
diff --git a/mypy.ini b/mypy.ini
index d4da898a0f..829da1c6ce 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -107,4 +107,13 @@ ignore_missing_imports=true
 ignore_missing_imports=true
 
 [mypy-databricks.*]
-ignore_missing_imports=true
\ No newline at end of file
+ignore_missing_imports=true
+
+[mypy-google.*]
+ignore_missing_imports = True
+
+[mypy-openai.*]
+ignore_missing_imports = True
+
+[mypy-dotenv.*]
+ignore_missing_imports = True
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index cad68180dc..96e730bf3a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "about-time"
@@ -2581,14 +2581,12 @@ test = ["coverage", "coveralls", "mock", "pytest", "pytest-cov"]
 
 [[package]]
 name = "flake8-encodings"
-version = "0.5.0.post1"
+version = "0.5.1"
 description = "A Flake8 plugin to identify incorrect use of encodings."
 optional = false
 python-versions = ">=3.6"
-files = [
-    {file = "flake8_encodings-0.5.0.post1-py3-none-any.whl", hash = "sha256:d2fecca0e89ba09c86e5d61cf6bdb1b337f0d74746aac67bbcf0c517b4cb6cba"},
-    {file = "flake8_encodings-0.5.0.post1.tar.gz", hash = "sha256:082c0163325c85b438a8106e876283b5ed3cbfc53e68d89130d70be8be4c9977"},
-]
+files = []
+develop = false
 
 [package.dependencies]
 astatine = ">=0.3.1"
@@ -2600,6 +2598,12 @@ flake8-helper = ">=0.1.1"
 all = ["jedi (>=0.18.0)"]
 classes = ["jedi (>=0.18.0)"]
 
+[package.source]
+type = "git"
+url = "https://github.com/dlt-hub/flake8-encodings.git"
+reference = "disable_jedi_support"
+resolved_reference = "ab20ac162de68c4a0956adc93dc14928a9f7dcb9"
+
 [[package]]
 name = "flake8-helper"
 version = "0.2.1"
@@ -3515,6 +3519,56 @@ files = [
     {file = "google_re2-1.1-4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f4d4f0823e8b2f6952a145295b1ff25245ce9bb136aff6fe86452e507d4c1dd"},
     {file = "google_re2-1.1-4-cp39-cp39-win32.whl", hash = "sha256:1afae56b2a07bb48cfcfefaa15ed85bae26a68f5dc7f9e128e6e6ea36914e847"},
     {file = "google_re2-1.1-4-cp39-cp39-win_amd64.whl", hash = "sha256:aa7d6d05911ab9c8adbf3c225a7a120ab50fd2784ac48f2f0d140c0b7afc2b55"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:222fc2ee0e40522de0b21ad3bc90ab8983be3bf3cec3d349c80d76c8bb1a4beb"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d4763b0b9195b72132a4e7de8e5a9bf1f05542f442a9115aa27cfc2a8004f581"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:209649da10c9d4a93d8a4d100ecbf9cc3b0252169426bec3e8b4ad7e57d600cf"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:68813aa333c1604a2df4a495b2a6ed065d7c8aebf26cc7e7abb5a6835d08353c"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:370a23ec775ad14e9d1e71474d56f381224dcf3e72b15d8ca7b4ad7dd9cd5853"},
+    {file = "google_re2-1.1-5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:14664a66a3ddf6bc9e56f401bf029db2d169982c53eff3f5876399104df0e9a6"},
+    {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea3722cc4932cbcebd553b69dce1b4a73572823cff4e6a244f1c855da21d511"},
+    {file = "google_re2-1.1-5-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e14bb264c40fd7c627ef5678e295370cd6ba95ca71d835798b6e37502fc4c690"},
+    {file = "google_re2-1.1-5-cp310-cp310-win32.whl", hash = "sha256:39512cd0151ea4b3969c992579c79b423018b464624ae955be685fc07d94556c"},
+    {file = "google_re2-1.1-5-cp310-cp310-win_amd64.whl", hash = "sha256:ac66537aa3bc5504320d922b73156909e3c2b6da19739c866502f7827b3f9fdf"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5b5ea68d54890c9edb1b930dcb2658819354e5d3f2201f811798bbc0a142c2b4"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:33443511b6b83c35242370908efe2e8e1e7cae749c766b2b247bf30e8616066c"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:413d77bdd5ba0bfcada428b4c146e87707452ec50a4091ec8e8ba1413d7e0619"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:5171686e43304996a34baa2abcee6f28b169806d0e583c16d55e5656b092a414"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b284db130283771558e31a02d8eb8fb756156ab98ce80035ae2e9e3a5f307c4"},
+    {file = "google_re2-1.1-5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:296e6aed0b169648dc4b870ff47bd34c702a32600adb9926154569ef51033f47"},
+    {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38d50e68ead374160b1e656bbb5d101f0b95fb4cc57f4a5c12100155001480c5"},
+    {file = "google_re2-1.1-5-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a0416a35921e5041758948bcb882456916f22845f66a93bc25070ef7262b72a"},
+    {file = "google_re2-1.1-5-cp311-cp311-win32.whl", hash = "sha256:a1d59568bbb5de5dd56dd6cdc79907db26cce63eb4429260300c65f43469e3e7"},
+    {file = "google_re2-1.1-5-cp311-cp311-win_amd64.whl", hash = "sha256:72f5a2f179648b8358737b2b493549370debd7d389884a54d331619b285514e3"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:cbc72c45937b1dc5acac3560eb1720007dccca7c9879138ff874c7f6baf96005"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:5fadd1417fbef7235fa9453dba4eb102e6e7d94b1e4c99d5fa3dd4e288d0d2ae"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:040f85c63cc02696485b59b187a5ef044abe2f99b92b4fb399de40b7d2904ccc"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:64e3b975ee6d9bbb2420494e41f929c1a0de4bcc16d86619ab7a87f6ea80d6bd"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8ee370413e00f4d828eaed0e83b8af84d7a72e8ee4f4bd5d3078bc741dfc430a"},
+    {file = "google_re2-1.1-5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:5b89383001079323f693ba592d7aad789d7a02e75adb5d3368d92b300f5963fd"},
+    {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:63cb4fdfbbda16ae31b41a6388ea621510db82feb8217a74bf36552ecfcd50ad"},
+    {file = "google_re2-1.1-5-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ebedd84ae8be10b7a71a16162376fd67a2386fe6361ef88c622dcf7fd679daf"},
+    {file = "google_re2-1.1-5-cp312-cp312-win32.whl", hash = "sha256:c8e22d1692bc2c81173330c721aff53e47ffd3c4403ff0cd9d91adfd255dd150"},
+    {file = "google_re2-1.1-5-cp312-cp312-win_amd64.whl", hash = "sha256:5197a6af438bb8c4abda0bbe9c4fbd6c27c159855b211098b29d51b73e4cbcf6"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b6727e0b98417e114b92688ad2aa256102ece51f29b743db3d831df53faf1ce3"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:711e2b6417eb579c61a4951029d844f6b95b9b373b213232efd413659889a363"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:71ae8b3df22c5c154c8af0f0e99d234a450ef1644393bc2d7f53fc8c0a1e111c"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:94a04e214bc521a3807c217d50cf099bbdd0c0a80d2d996c0741dbb995b5f49f"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:a770f75358508a9110c81a1257721f70c15d9bb592a2fb5c25ecbd13566e52a5"},
+    {file = "google_re2-1.1-5-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:07c9133357f7e0b17c6694d5dcb82e0371f695d7c25faef2ff8117ef375343ff"},
+    {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:204ca6b1cf2021548f4a9c29ac015e0a4ab0a7b6582bf2183d838132b60c8fda"},
+    {file = "google_re2-1.1-5-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f0b95857c2c654f419ca684ec38c9c3325c24e6ba7d11910a5110775a557bb18"},
+    {file = "google_re2-1.1-5-cp38-cp38-win32.whl", hash = "sha256:347ac770e091a0364e822220f8d26ab53e6fdcdeaec635052000845c5a3fb869"},
+    {file = "google_re2-1.1-5-cp38-cp38-win_amd64.whl", hash = "sha256:ec32bb6de7ffb112a07d210cf9f797b7600645c2d5910703fa07f456dd2150e0"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:eb5adf89060f81c5ff26c28e261e6b4997530a923a6093c9726b8dec02a9a326"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a22630c9dd9ceb41ca4316bccba2643a8b1d5c198f21c00ed5b50a94313aaf10"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:544dc17fcc2d43ec05f317366375796351dec44058e1164e03c3f7d050284d58"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:19710af5ea88751c7768575b23765ce0dfef7324d2539de576f75cdc319d6654"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:f82995a205e08ad896f4bd5ce4847c834fab877e1772a44e5f262a647d8a1dec"},
+    {file = "google_re2-1.1-5-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:63533c4d58da9dc4bc040250f1f52b089911699f0368e0e6e15f996387a984ed"},
+    {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79e00fcf0cb04ea35a22b9014712d448725ce4ddc9f08cc818322566176ca4b0"},
+    {file = "google_re2-1.1-5-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bc41afcefee2da6c4ed883a93d7f527c4b960cd1d26bbb0020a7b8c2d341a60a"},
+    {file = "google_re2-1.1-5-cp39-cp39-win32.whl", hash = "sha256:486730b5e1f1c31b0abc6d80abe174ce4f1188fe17d1b50698f2bf79dc6e44be"},
+    {file = "google_re2-1.1-5-cp39-cp39-win_amd64.whl", hash = "sha256:4de637ca328f1d23209e80967d1b987d6b352cd01b3a52a84b4d742c69c3da6c"},
 ]
 
 [[package]]
@@ -7496,6 +7550,32 @@ files = [
 [package.dependencies]
 pyasn1 = ">=0.1.3"
 
+[[package]]
+name = "ruff"
+version = "0.3.2"
+description = "An extremely fast Python linter and code formatter, written in Rust."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "ruff-0.3.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77f2612752e25f730da7421ca5e3147b213dca4f9a0f7e0b534e9562c5441f01"},
+    {file = "ruff-0.3.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9966b964b2dd1107797be9ca7195002b874424d1d5472097701ae8f43eadef5d"},
+    {file = "ruff-0.3.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b83d17ff166aa0659d1e1deaf9f2f14cbe387293a906de09bc4860717eb2e2da"},
+    {file = "ruff-0.3.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb875c6cc87b3703aeda85f01c9aebdce3d217aeaca3c2e52e38077383f7268a"},
+    {file = "ruff-0.3.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be75e468a6a86426430373d81c041b7605137a28f7014a72d2fc749e47f572aa"},
+    {file = "ruff-0.3.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:967978ac2d4506255e2f52afe70dda023fc602b283e97685c8447d036863a302"},
+    {file = "ruff-0.3.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1231eacd4510f73222940727ac927bc5d07667a86b0cbe822024dd00343e77e9"},
+    {file = "ruff-0.3.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c6d613b19e9a8021be2ee1d0e27710208d1603b56f47203d0abbde906929a9b"},
+    {file = "ruff-0.3.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8439338a6303585d27b66b4626cbde89bb3e50fa3cae86ce52c1db7449330a7"},
+    {file = "ruff-0.3.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:de8b480d8379620cbb5ea466a9e53bb467d2fb07c7eca54a4aa8576483c35d36"},
+    {file = "ruff-0.3.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:b74c3de9103bd35df2bb05d8b2899bf2dbe4efda6474ea9681280648ec4d237d"},
+    {file = "ruff-0.3.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:f380be9fc15a99765c9cf316b40b9da1f6ad2ab9639e551703e581a5e6da6745"},
+    {file = "ruff-0.3.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0ac06a3759c3ab9ef86bbeca665d31ad3aa9a4b1c17684aadb7e61c10baa0df4"},
+    {file = "ruff-0.3.2-py3-none-win32.whl", hash = "sha256:9bd640a8f7dd07a0b6901fcebccedadeb1a705a50350fb86b4003b805c81385a"},
+    {file = "ruff-0.3.2-py3-none-win_amd64.whl", hash = "sha256:0c1bdd9920cab5707c26c8b3bf33a064a4ca7842d91a99ec0634fec68f9f4037"},
+    {file = "ruff-0.3.2-py3-none-win_arm64.whl", hash = "sha256:5f65103b1d76e0d600cabd577b04179ff592064eaa451a70a81085930e907d0b"},
+    {file = "ruff-0.3.2.tar.gz", hash = "sha256:fa78ec9418eb1ca3db392811df3376b46471ae93792a81af2d1cbb0e5dcb5142"},
+]
+
 [[package]]
 name = "s3fs"
 version = "2024.2.0"
@@ -8986,4 +9066,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.13"
-content-hash = "a7aa3e523522ab3260a7a19f097a34349b66cf046289db1e17b48f88f7fd189f"
+content-hash = "99658baf1bfda2ac065bda897637cae0eb122c76777688a7d606df0ef06c7fcc"
diff --git a/pyproject.toml b/pyproject.toml
index 88e6bd9390..de5f8055c5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dlt"
-version = "0.4.6"
+version = "0.4.7"
 description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run."
 authors = ["dltHub Inc. <services@dlthub.com>"]
 maintainers = [ "Marcin Rudolf <marcin@dlthub.com>", "Adrian Brudaru <adrian@dlthub.com>", "Ty Dunn <ty@dlthub.com>"]
@@ -134,7 +134,7 @@ types-simplejson = ">=3.17.0"
 types-requests = ">=2.25.6"
 types-python-dateutil = ">=2.8.15"
 flake8-tidy-imports = ">=4.8.0"
-flake8-encodings = "^0.5.0"
+flake8-encodings = { git = "https://github.com/dlt-hub/flake8-encodings.git", branch = "disable_jedi_support" }
 flake8-builtins = "^1.5.3"
 boto3-stubs = "^1.28.28"
 types-tqdm = "^4.66.0.2"
@@ -144,6 +144,7 @@ cryptography = "^41.0.7"
 google-api-python-client = ">=1.7.11"
 pytest-asyncio = "^0.23.5"
 types-sqlalchemy = "^1.4.53.38"
+ruff = "^0.3.2"
 
 [tool.poetry.group.pipeline]
 optional=true
@@ -206,11 +207,6 @@ profile = "black"
 src_paths = ["dlt"]
 multi_line_output = 3
 
-[tool.ruff] # https://beta.ruff.rs/docs/
-line-length = 100
-ignore = ["F401"]
-ignore-init-module-imports = true
-
 [build-system]
 requires = ["poetry-core>=1.0.8"]
 build-backend = "poetry.core.masonry.api"
\ No newline at end of file
diff --git a/tests/cases.py b/tests/cases.py
index 8653f999c6..85caec4b8d 100644
--- a/tests/cases.py
+++ b/tests/cases.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Any, Sequence, Tuple, Literal
+from typing import Dict, List, Any, Sequence, Tuple, Literal, Union
 import base64
 from hexbytes import HexBytes
 from copy import deepcopy
@@ -7,7 +7,7 @@
 
 from dlt.common import Decimal, pendulum, json
 from dlt.common.data_types import TDataType
-from dlt.common.typing import StrAny
+from dlt.common.typing import StrAny, TDataItems
 from dlt.common.wei import Wei
 from dlt.common.time import (
     ensure_pendulum_datetime,
@@ -161,18 +161,23 @@ def table_update_and_row(
 
 
 def assert_all_data_types_row(
-    db_row: List[Any],
+    db_row: Union[List[Any], TDataItems],
     parse_complex_strings: bool = False,
     allow_base64_binary: bool = False,
     timestamp_precision: int = 6,
     schema: TTableSchemaColumns = None,
+    expect_filtered_null_columns=False,
 ) -> None:
     # content must equal
     # print(db_row)
     schema = schema or TABLE_UPDATE_COLUMNS_SCHEMA
 
     # Include only columns requested in schema
-    db_mapping = {col_name: db_row[i] for i, col_name in enumerate(schema)}
+    if isinstance(db_row, dict):
+        db_mapping = db_row.copy()
+    else:
+        db_mapping = {col_name: db_row[i] for i, col_name in enumerate(schema)}
+
     expected_rows = {key: value for key, value in TABLE_ROW_ALL_DATA_TYPES.items() if key in schema}
     # prepare date to be compared: convert into pendulum instance, adjust microsecond precision
     if "col4" in expected_rows:
@@ -226,8 +231,16 @@ def assert_all_data_types_row(
     if "col11" in db_mapping:
         db_mapping["col11"] = db_mapping["col11"].isoformat()
 
-    for expected, actual in zip(expected_rows.values(), db_mapping.values()):
-        assert expected == actual
+    if expect_filtered_null_columns:
+        for key, expected in expected_rows.items():
+            if expected is None:
+                assert db_mapping.get(key, None) is None
+                db_mapping[key] = None
+
+    for key, expected in expected_rows.items():
+        actual = db_mapping[key]
+        assert expected == actual, f"Expected {expected} but got {actual} for column {key}"
+
     assert db_mapping == expected_rows
 
 
diff --git a/tests/cli/test_init_command.py b/tests/cli/test_init_command.py
index 479bedb6fb..bf5c21c80f 100644
--- a/tests/cli/test_init_command.py
+++ b/tests/cli/test_init_command.py
@@ -186,6 +186,8 @@ def test_init_all_verified_sources_isolated(cloned_init_repo: FileStorage) -> No
 def test_init_all_destinations(
     destination_name: str, project_files: FileStorage, repo_dir: str
 ) -> None:
+    if destination_name == "destination":
+        pytest.skip("Init for generic destination not implemented yet")
     pipeline_name = f"generic_{destination_name}_pipeline"
     init_command.init_command(pipeline_name, destination_name, True, repo_dir)
     assert_init_files(project_files, pipeline_name, destination_name)
diff --git a/tests/common/configuration/test_inject.py b/tests/common/configuration/test_inject.py
index 8b9616ccd7..c6ab8aa756 100644
--- a/tests/common/configuration/test_inject.py
+++ b/tests/common/configuration/test_inject.py
@@ -1,11 +1,17 @@
 import os
 from typing import Any, Dict, Optional, Type, Union
 import pytest
-
+import time, threading
 import dlt
 
 from dlt.common.configuration.exceptions import ConfigFieldMissingException
-from dlt.common.configuration.inject import get_fun_spec, last_config, with_config
+from dlt.common.configuration.inject import (
+    get_fun_spec,
+    last_config,
+    with_config,
+    create_resolved_partial,
+)
+from dlt.common.configuration.container import Container
 from dlt.common.configuration.providers import EnvironProvider
 from dlt.common.configuration.providers.toml import SECRETS_TOML
 from dlt.common.configuration.resolve import inject_section
@@ -14,7 +20,7 @@
     GcpServiceAccountCredentialsWithoutDefaults,
     ConnectionStringCredentials,
 )
-from dlt.common.configuration.specs.base_configuration import is_secret_hint
+from dlt.common.configuration.specs.base_configuration import configspec, is_secret_hint
 from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext
 from dlt.common.configuration.specs.config_section_context import ConfigSectionContext
 from dlt.common.reflection.spec import _get_spec_name_from_f
@@ -158,6 +164,24 @@ def test_inject_with_sections() -> None:
     pass
 
 
+def test_inject_spec_in_func_params() -> None:
+    @configspec
+    class TestConfig(BaseConfiguration):
+        base_value: str
+
+    # if any of args (ie. `init` below) is an instance of SPEC, we use it as initial value
+
+    @with_config(spec=TestConfig)
+    def test_spec_arg(base_value=dlt.config.value, init: TestConfig = None):
+        return base_value
+
+    # spec used to wrap function
+    spec = get_fun_spec(test_spec_arg)
+    assert spec == TestConfig
+    # call function with init, should resolve even if we do not provide the base_value in config
+    assert test_spec_arg(init=TestConfig(base_value="A")) == "A"  # type: ignore[call-arg]
+
+
 def test_inject_with_sections_and_sections_context() -> None:
     @with_config
     def no_sections(value=dlt.config.value):
@@ -206,6 +230,126 @@ def test_sections_like_resource(value=dlt.config.value):
         assert test_sections_like_resource() == "resource_style_injected"
 
 
+def test_partial() -> None:
+    @with_config(sections=("test",))
+    def test_sections(value=dlt.config.value):
+        return value
+
+    # no value in scope will fail
+    with pytest.raises(ConfigFieldMissingException):
+        test_sections()
+
+    # same for partial
+    with pytest.raises(ConfigFieldMissingException):
+        create_resolved_partial(test_sections)
+
+    # with value in scope partial will work
+    os.environ["TEST__VALUE"] = "first_val"
+    partial = create_resolved_partial(test_sections)
+
+    # remove the value from scope and partial will work
+    del os.environ["TEST__VALUE"]
+    assert partial() == "first_val"
+
+    # original func wont
+    with pytest.raises(ConfigFieldMissingException):
+        test_sections()
+
+    # partial retains value
+    os.environ["TEST__VALUE"] = "new_val"
+    assert partial() == "first_val"
+    assert test_sections() == "new_val"
+
+    # new partial picks up new value
+    new_partial = create_resolved_partial(test_sections)
+
+    # remove the value from scope and partial will work
+    del os.environ["TEST__VALUE"]
+    assert new_partial() == "new_val"
+    assert partial() == "first_val"
+
+
+def test_base_spec() -> None:
+    @configspec
+    class BaseParams(BaseConfiguration):
+        str_str: str
+
+    @with_config(base=BaseParams)
+    def f_explicit_base(str_str=dlt.config.value, opt: bool = True):
+        # for testing
+        assert opt is False
+        return str_str
+
+    # discovered spec should derive from TestConfig
+    spec = get_fun_spec(f_explicit_base)
+    assert issubclass(spec, BaseParams)
+    # but derived
+    assert spec != BaseParams
+
+    # call function
+    os.environ["STR_STR"] = "new_val"
+    assert f_explicit_base(opt=False) == "new_val"
+
+    # edge case, function does not take str_str but still fail because base config must resolve
+    del os.environ["STR_STR"]
+
+    @with_config(base=BaseParams)
+    def f_no_base(opt: bool = True):
+        raise AssertionError("never")
+
+    with pytest.raises(ConfigFieldMissingException):
+        f_no_base(opt=False)
+
+
+@pytest.mark.parametrize("lock", [False, True])
+@pytest.mark.parametrize("same_pool", [False, True])
+def test_lock_context(lock, same_pool) -> None:
+    # we create a slow provider to test locking
+
+    class SlowProvider(EnvironProvider):
+        def get_value(self, key, hint, pipeline_name, *sections):
+            import time
+
+            time.sleep(0.5)
+            return super().get_value(key, hint, pipeline_name, *sections)
+
+    ctx = ConfigProvidersContext()
+    ctx.providers.clear()
+    ctx.add_provider(SlowProvider())
+
+    @with_config(sections=("test",), lock_context_on_injection=lock)
+    def test_sections(value=dlt.config.value):
+        return value
+
+    os.environ["TEST__VALUE"] = "test_val"
+    with Container().injectable_context(ctx):
+        start = time.time()
+
+        if same_pool:
+            thread_ids = ["dlt-pool-1-1", "dlt-pool-1-2"]
+        else:
+            thread_ids = ["dlt-pool-5-1", "dlt-pool-20-2"]
+
+        # simulate threads in the same pool
+        thread1 = threading.Thread(target=test_sections, name=thread_ids[0])
+        thread2 = threading.Thread(target=test_sections, name=thread_ids[1])
+
+        thread1.start()
+        thread2.start()
+
+        thread1.join()
+        thread2.join()
+
+        elapsed = time.time() - start
+
+        # see wether there was any parallel execution going on
+        # it should only lock if we're in the same pool and we want it to lock
+        if lock and same_pool:
+            assert elapsed > 1
+        else:
+            assert elapsed < 0.7
+
+
 @pytest.mark.skip("not implemented")
 def test_inject_with_str_sections() -> None:
     # sections param is str not tuple
diff --git a/tests/common/reflection/test_reflect_spec.py b/tests/common/reflection/test_reflect_spec.py
index 11c66a2763..092d25b717 100644
--- a/tests/common/reflection/test_reflect_spec.py
+++ b/tests/common/reflection/test_reflect_spec.py
@@ -1,11 +1,12 @@
 import inspect
-from typing import Any, Optional
+from typing import Any, Callable, Optional
 
 import dlt
 from dlt.common import Decimal
 from dlt.common.typing import TSecretValue, is_optional_type
 from dlt.common.configuration.inject import get_fun_spec, with_config
 from dlt.common.configuration.specs import (
+    configspec,
     BaseConfiguration,
     RunConfiguration,
     ConnectionStringCredentials,
@@ -34,7 +35,8 @@ def f_typed(
     ) -> None:
         pass
 
-    SPEC: Any = spec_from_signature(f_typed, inspect.signature(f_typed))
+    SPEC: Any
+    SPEC, _ = spec_from_signature(f_typed, inspect.signature(f_typed))
     assert SPEC.p1 is None
     assert SPEC.p2 is None
     assert SPEC.p3 is None
@@ -60,7 +62,7 @@ def f_typed_default(
     ) -> None:
         pass
 
-    SPEC = spec_from_signature(f_typed_default, inspect.signature(f_typed_default))
+    SPEC, _ = spec_from_signature(f_typed_default, inspect.signature(f_typed_default))
     assert SPEC.t_p1 == "str"
     assert SPEC.t_p2 == _DECIMAL_DEFAULT
     assert SPEC.t_p3 == _SECRET_DEFAULT
@@ -82,7 +84,7 @@ def f_typed_default(
     def f_untyped(untyped_p1=None, untyped_p2=dlt.config.value) -> None:
         pass
 
-    SPEC = spec_from_signature(f_untyped, inspect.signature(f_untyped))
+    SPEC, _ = spec_from_signature(f_untyped, inspect.signature(f_untyped))
     assert SPEC.untyped_p1 is None
     assert SPEC.untyped_p2 is None
     fields = SPEC.get_resolvable_fields()
@@ -98,7 +100,7 @@ def f_untyped_default(
     ) -> None:
         pass
 
-    SPEC = spec_from_signature(f_untyped_default, inspect.signature(f_untyped_default))
+    SPEC, _ = spec_from_signature(f_untyped_default, inspect.signature(f_untyped_default))
     assert SPEC.untyped_p1 == "str"
     assert SPEC.untyped_p2 == _DECIMAL_DEFAULT
     assert isinstance(SPEC().untyped_p3, ConnectionStringCredentials)
@@ -124,7 +126,7 @@ def f_pos_kw_only(
     ) -> None:
         pass
 
-    SPEC = spec_from_signature(f_pos_kw_only, inspect.signature(f_pos_kw_only))
+    SPEC, _ = spec_from_signature(f_pos_kw_only, inspect.signature(f_pos_kw_only))
     assert SPEC.pos_only_1 is None
     assert SPEC.pos_only_2 == "default"
     assert SPEC.kw_only_1 is None
@@ -140,7 +142,7 @@ def f_pos_kw_only(
     # skip arguments with defaults
     # deregister spec to disable cache
     del globals()[SPEC.__name__]
-    SPEC = spec_from_signature(
+    SPEC, _ = spec_from_signature(
         f_pos_kw_only, inspect.signature(f_pos_kw_only), include_defaults=False
     )
     assert not hasattr(SPEC, "kw_only_1")
@@ -153,7 +155,7 @@ def f_pos_kw_only(
     def f_variadic(var_1: str = "A", *args, kw_var_1: str, **kwargs) -> None:
         print(locals())
 
-    SPEC = spec_from_signature(f_variadic, inspect.signature(f_variadic))
+    SPEC, _ = spec_from_signature(f_variadic, inspect.signature(f_variadic))
     assert SPEC.var_1 == "A"
     assert not hasattr(SPEC, "kw_var_1")  # kw parameters that must be explicitly passed are removed
     assert not hasattr(SPEC, "args")
@@ -161,24 +163,33 @@ def f_variadic(var_1: str = "A", *args, kw_var_1: str, **kwargs) -> None:
     assert fields == {"var_1": str}
 
 
-def test_spec_none_when_no_fields() -> None:
+def test_spec_when_no_fields() -> None:
     def f_default_only(arg1, arg2=None):
         pass
 
-    SPEC = spec_from_signature(f_default_only, inspect.signature(f_default_only))
-    assert SPEC is not None
+    SPEC, fields = spec_from_signature(f_default_only, inspect.signature(f_default_only))
+    assert len(fields) > 0
 
     del globals()[SPEC.__name__]
-    SPEC = spec_from_signature(
+    SPEC, fields = spec_from_signature(
         f_default_only, inspect.signature(f_default_only), include_defaults=False
     )
-    assert SPEC is None
+    assert len(fields) == 0
 
     def f_no_spec(arg1):
         pass
 
-    SPEC = spec_from_signature(f_no_spec, inspect.signature(f_no_spec))
-    assert SPEC is None
+    SPEC, fields = spec_from_signature(f_no_spec, inspect.signature(f_no_spec))
+    assert len(fields) == 0
+
+    def f_no_spec_non_injectable(args1: Callable[[str], str] = str.upper):
+        pass
+
+    # all params are non injectable
+    SPEC, fields = spec_from_signature(
+        f_no_spec_non_injectable, inspect.signature(f_no_spec_non_injectable)
+    )
+    assert len(fields) == 0
 
 
 def f_top_kw_defaults_args(
@@ -247,7 +258,7 @@ def f_kw_defaults_args(
         "secret_val": "dlt.secrets.value",
         "config_val": "dlt.config.value",
     }
-    SPEC = spec_from_signature(f_defaults, inspect.signature(f_defaults))
+    SPEC, _ = spec_from_signature(f_defaults, inspect.signature(f_defaults))
     fields = SPEC.get_resolvable_fields()
     # fields market with dlt config are not optional, same for required fields
     for arg in [
@@ -272,7 +283,7 @@ def f_kw_defaults_args(
         "kw_lit": "'12131'",
         "kw1": "dlt.config.value",
     }
-    SPEC = spec_from_signature(f_kw_defaults, inspect.signature(f_kw_defaults))
+    SPEC, _ = spec_from_signature(f_kw_defaults, inspect.signature(f_kw_defaults))
     fields = SPEC.get_resolvable_fields()
     assert not is_optional_type(fields["kw_lit"])
     assert not is_optional_type(fields["kw1"])
@@ -298,3 +309,52 @@ def f_kw_defaults_args(
         "arg3": "dlt.config.value",
         "arg2": "'top'",
     }
+
+
+def test_reflect_custom_base() -> None:
+    @configspec
+    class BaseParams(BaseConfiguration):
+        str_str: str
+
+    def _f_1(str_str=dlt.config.value, p_def: bool = True):
+        pass
+
+    SPEC, fields = spec_from_signature(_f_1, inspect.signature(_f_1), base=BaseParams)
+    assert issubclass(SPEC, BaseParams)
+    # base field type is preserved over function type
+    assert (
+        SPEC.get_resolvable_fields()["str_str"]
+        == fields["str_str"]
+        == BaseParams.get_resolvable_fields()["str_str"]
+    )
+    assert "p_def" in fields
+    assert "p_def" in SPEC.get_resolvable_fields()
+
+    def _f_2(req_arg):
+        pass
+
+    SPEC, fields = spec_from_signature(_f_2, inspect.signature(_f_2), base=BaseParams)
+    assert issubclass(SPEC, BaseParams)
+    # f does not take any args of interest and does not require any fields to be injected
+    assert len(fields) == 0
+    assert SPEC.get_resolvable_fields() == BaseParams().get_resolvable_fields()
+
+    def _f_3(str_str: int = dlt.config.value, p_def: bool = True):
+        pass
+
+    SPEC, fields = spec_from_signature(_f_3, inspect.signature(_f_3), base=BaseParams)
+    assert issubclass(SPEC, BaseParams)
+    # int overrides str_str
+    assert SPEC.get_resolvable_fields()["str_str"] == int
+    # default
+    assert fields["str_str"] is None
+
+    def _f_4(str_str=300, p_def: bool = True):
+        pass
+
+    SPEC, fields = spec_from_signature(_f_4, inspect.signature(_f_4), base=BaseParams)
+    assert issubclass(SPEC, BaseParams)
+    # int overrides str_str
+    assert SPEC.get_resolvable_fields()["str_str"] == int
+    # default
+    assert fields["str_str"] == 300
diff --git a/tests/common/schema/test_coercion.py b/tests/common/schema/test_coercion.py
index 922024a89b..34b62f9564 100644
--- a/tests/common/schema/test_coercion.py
+++ b/tests/common/schema/test_coercion.py
@@ -377,10 +377,16 @@ def test_coerce_type_complex() -> None:
     assert coerce_value("complex", "complex", v_list) == v_list
     assert coerce_value("text", "complex", v_dict) == json.dumps(v_dict)
     assert coerce_value("text", "complex", v_list) == json.dumps(v_list)
+    assert coerce_value("complex", "text", json.dumps(v_dict)) == v_dict
+    assert coerce_value("complex", "text", json.dumps(v_list)) == v_list
+
     # all other coercions fail
     with pytest.raises(ValueError):
         coerce_value("binary", "complex", v_list)
 
+    with pytest.raises(ValueError):
+        coerce_value("complex", "text", "not a json string")
+
 
 def test_coerce_type_complex_with_pua() -> None:
     v_dict = {
@@ -395,6 +401,10 @@ def test_coerce_type_complex_with_pua() -> None:
     }
     assert coerce_value("complex", "complex", copy(v_dict)) == exp_v
     assert coerce_value("text", "complex", copy(v_dict)) == json.dumps(exp_v)
+
+    # TODO: what to test for this case if at all?
+    # assert coerce_value("complex", "text", json.dumps(v_dict)) == exp_v
+
     # also decode recursively
     custom_pua_decode_nested(v_dict)
     # restores datetime type
diff --git a/tests/common/storages/test_load_package.py b/tests/common/storages/test_load_package.py
index f671ddcf32..d61029c8cf 100644
--- a/tests/common/storages/test_load_package.py
+++ b/tests/common/storages/test_load_package.py
@@ -1,6 +1,9 @@
 import os
 import pytest
 from pathlib import Path
+from os.path import join
+
+import dlt
 
 from dlt.common import sleep
 from dlt.common.schema import Schema
@@ -9,6 +12,15 @@
 
 from tests.common.storages.utils import start_loading_file, assert_package_info, load_storage
 from tests.utils import autouse_test_storage
+from dlt.common.pendulum import pendulum
+from dlt.common.configuration.container import Container
+from dlt.common.storages.load_package import (
+    LoadPackageStateInjectableContext,
+    destination_state,
+    load_package,
+    commit_load_package_state,
+    clear_destination_state,
+)
 
 
 def test_is_partially_loaded(load_storage: LoadStorage) -> None:
@@ -57,6 +69,83 @@ def test_save_load_schema(load_storage: LoadStorage) -> None:
     assert schema.stored_version == schema_copy.stored_version
 
 
+def test_create_and_update_loadpackage_state(load_storage: LoadStorage) -> None:
+    load_storage.new_packages.create_package("copy")
+    state = load_storage.new_packages.get_load_package_state("copy")
+    assert state["_state_version"] == 0
+    assert state["_version_hash"] is not None
+    assert state["created_at"] is not None
+    old_state = state.copy()
+
+    state["new_key"] = "new_value"  # type: ignore
+    load_storage.new_packages.save_load_package_state("copy", state)
+
+    state = load_storage.new_packages.get_load_package_state("copy")
+    assert state["new_key"] == "new_value"  # type: ignore
+    assert state["_state_version"] == 1
+    assert state["_version_hash"] != old_state["_version_hash"]
+    # created timestamp should be conserved
+    assert state["created_at"] == old_state["created_at"]
+
+    # check timestamp
+    time = pendulum.parse(state["created_at"])
+    now = pendulum.now()
+    assert (now - time).in_seconds() < 2  # type: ignore
+
+
+def test_loadpackage_state_injectable_context(load_storage: LoadStorage) -> None:
+    load_storage.new_packages.create_package("copy")
+
+    container = Container()
+    with container.injectable_context(
+        LoadPackageStateInjectableContext(
+            storage=load_storage.new_packages,
+            load_id="copy",
+        )
+    ):
+        # test general load package state
+        injected_state = load_package()
+        assert injected_state["state"]["_state_version"] == 0
+        injected_state["state"]["new_key"] = "new_value"  # type: ignore
+
+        # not persisted yet
+        assert load_storage.new_packages.get_load_package_state("copy").get("new_key") is None
+        # commit
+        commit_load_package_state()
+
+        # now it should be persisted
+        assert (
+            load_storage.new_packages.get_load_package_state("copy").get("new_key") == "new_value"
+        )
+        assert load_storage.new_packages.get_load_package_state("copy").get("_state_version") == 1
+
+        # check that second injection is the same as first
+        second_injected_instance = load_package()
+        assert second_injected_instance == injected_state
+
+        # check scoped destination states
+        assert (
+            load_storage.new_packages.get_load_package_state("copy").get("destination_state")
+            is None
+        )
+        dstate = destination_state()
+        dstate["new_key"] = "new_value"
+        commit_load_package_state()
+        assert load_storage.new_packages.get_load_package_state("copy").get(
+            "destination_state"
+        ) == {"new_key": "new_value"}
+
+        # this also shows up on the previously injected state
+        assert injected_state["state"]["destination_state"]["new_key"] == "new_value"
+
+        # clear destination state
+        clear_destination_state()
+        assert (
+            load_storage.new_packages.get_load_package_state("copy").get("destination_state")
+            is None
+        )
+
+
 def test_job_elapsed_time_seconds(load_storage: LoadStorage) -> None:
     load_id, fn = start_loading_file(load_storage, "test file")  # type: ignore[arg-type]
     fp = load_storage.normalized_packages.storage.make_full_path(
@@ -119,3 +208,25 @@ def test_build_parse_job_path(load_storage: LoadStorage) -> None:
 
     with pytest.raises(ValueError):
         ParsedLoadJobFileName.parse("tab.id.wrong_retry.jsonl")
+
+
+def test_migrate_to_load_package_state() -> None:
+    """
+    Here we test that an existing load package without a state will not error
+    when the user upgrades to a dlt version with the state. we simulate it by
+    wiping the state after normalization and see wether anything breaks
+    """
+    from dlt.destinations import dummy
+
+    p = dlt.pipeline(pipeline_name=uniq_id(), destination=dummy(completed_prob=1))
+
+    p.extract([{"id": 1, "name": "dave"}], table_name="person")
+    p.normalize()
+
+    # delete load package after normalization
+    storage = p._get_load_storage()
+    packaged_id = p.list_normalized_load_packages()[0]
+    state_path = storage.normalized_packages.get_load_package_state_path(packaged_id)
+    storage.storage.delete(join(LoadStorage.NORMALIZED_FOLDER, state_path))
+
+    p.load()
diff --git a/tests/common/test_destination.py b/tests/common/test_destination.py
index a7547d27e0..b93cb5b483 100644
--- a/tests/common/test_destination.py
+++ b/tests/common/test_destination.py
@@ -54,7 +54,17 @@ def test_import_module_by_path() -> None:
 def test_import_all_destinations() -> None:
     # this must pass without the client dependencies being imported
     for dest_type in ACTIVE_DESTINATIONS:
-        dest = Destination.from_reference(dest_type, None, dest_type + "_name", "production")
+        # generic destination needs a valid callable, otherwise instantiation will fail
+        additional_args = {}
+        if dest_type == "destination":
+
+            def dest_callable(items, table) -> None:
+                pass
+
+            additional_args["destination_callable"] = dest_callable
+        dest = Destination.from_reference(
+            dest_type, None, dest_type + "_name", "production", **additional_args
+        )
         assert dest.destination_type == "dlt.destinations." + dest_type
         assert dest.destination_name == dest_type + "_name"
         assert dest.config_params["environment"] == "production"
diff --git a/tests/common/test_validation.py b/tests/common/test_validation.py
index 3fff3bf2ea..3297df1038 100644
--- a/tests/common/test_validation.py
+++ b/tests/common/test_validation.py
@@ -3,6 +3,7 @@
 import yaml
 from typing import Callable, List, Literal, Mapping, Sequence, TypedDict, TypeVar, Optional, Union
 
+from dlt.common import Decimal
 from dlt.common.exceptions import DictValidationException
 from dlt.common.schema.typing import TStoredSchema, TColumnSchema
 from dlt.common.schema.utils import simple_regex_validator
@@ -18,6 +19,14 @@
 TTableHintTemplate = Union[TDynHintType, TFunHintTemplate[TDynHintType]]
 
 
+class ClassTest:
+    a: str
+
+
+class SubClassTest(ClassTest):
+    b: str
+
+
 class TDict(TypedDict):
     field: TLiteral
 
@@ -41,6 +50,7 @@ class TTestRecord(TypedDict):
     f_literal_optional: Optional[TLiteral]
     f_seq_literal: Sequence[Optional[TLiteral]]
     f_optional_union: Optional[Union[TLiteral, TDict]]
+    f_class: ClassTest
 
 
 TEST_COL: TColumnSchema = {"name": "col1", "data_type": "bigint", "nullable": False}
@@ -70,6 +80,7 @@ class TTestRecord(TypedDict):
     "f_literal_optional": "dos",
     "f_seq_literal": ["uno", "dos", "tres"],
     "f_optional_union": {"field": "uno"},
+    "f_class": SubClassTest(),
 }
 
 
@@ -273,3 +284,38 @@ def f(item: Union[TDataItem, TDynHintType]) -> TDynHintType:
     validate_dict(
         TTestRecordCallable, test_item, path=".", validator_f=lambda p, pk, pv, t: callable(pv)
     )
+
+
+def test_class() -> None:
+    class TTestRecordInvalidClass(TypedDict):
+        prop: SubClassTest
+
+    # prop must be SubClassTest or derive from it. not the case below
+    test_item_1 = {"prop": ClassTest()}
+    with pytest.raises(DictValidationException):
+        validate_dict(TTestRecordInvalidClass, test_item_1, path=".")
+
+    # unions are accepted
+    class TTestRecordClassUnion(TypedDict):
+        prop: Union[SubClassTest, ClassTest]
+
+    validate_dict(TTestRecordClassUnion, test_item_1, path=".")
+
+    test_item_2 = {"prop": Decimal(1)}
+    with pytest.raises(DictValidationException):
+        validate_dict(TTestRecordClassUnion, test_item_2, path=".")
+
+
+# def test_union_merge() -> None:
+#     """Overriding fields is simply illegal in TypedDict"""
+#     class EndpointResource(TypedDict, total=False):
+#         name: TTableHintTemplate[str]
+
+#     class TTestRecordNoName(EndpointResource, total=False):
+#         name: Optional[TTableHintTemplate[str]]
+
+#     # test_item = {"name": None}
+#     # validate_dict(TTestRecordNoName, test_item, path=".")
+
+#     test_item = {}
+#     validate_dict(TTestRecordNoName, test_item, path=".")
diff --git a/tests/common/test_versioned_state.py b/tests/common/test_versioned_state.py
new file mode 100644
index 0000000000..e1f31a8a92
--- /dev/null
+++ b/tests/common/test_versioned_state.py
@@ -0,0 +1,43 @@
+from dlt.common.versioned_state import (
+    generate_state_version_hash,
+    bump_state_version_if_modified,
+    default_versioned_state,
+)
+
+
+def test_versioned_state() -> None:
+    state = default_versioned_state()
+    assert state["_state_version"] == 0
+    assert state["_state_engine_version"] == 1
+
+    # first hash_ generation does not change version, attrs are not modified
+    version, hash_, previous_hash = bump_state_version_if_modified(state)
+    assert version == 0
+    assert hash_ is not None
+    assert previous_hash is None
+    assert state["_version_hash"] == hash_
+
+    # change attr, but exclude while generating
+    state["foo"] = "bar"  # type: ignore
+    version, hash_, previous_hash = bump_state_version_if_modified(state, exclude_attrs=["foo"])
+    assert version == 0
+    assert hash_ == previous_hash
+
+    # now don't exclude (remember old hash_ to compare return vars)
+    old_hash = state["_version_hash"]
+    version, hash_, previous_hash = bump_state_version_if_modified(state)
+    assert version == 1
+    assert hash_ != previous_hash
+    assert old_hash != hash_
+    assert previous_hash == old_hash
+
+    # messing with state engine version will not change hash_
+    state["_state_engine_version"] = 5
+    version, hash_, previous_hash = bump_state_version_if_modified(state)
+    assert version == 1
+    assert hash_ == previous_hash
+
+    # make sure state object is not modified while bumping with no effect
+    old_state = state.copy()
+    version, hash_, previous_hash = bump_state_version_if_modified(state)
+    assert old_state == state
diff --git a/tests/destinations/test_custom_destination.py b/tests/destinations/test_custom_destination.py
new file mode 100644
index 0000000000..7b74e5406c
--- /dev/null
+++ b/tests/destinations/test_custom_destination.py
@@ -0,0 +1,594 @@
+from typing import List, Tuple, Dict, Union, cast
+
+import dlt
+import pytest
+import pytest
+import os
+import inspect
+
+from copy import deepcopy
+from dlt.common.configuration.specs.base_configuration import configspec
+from dlt.common.typing import TDataItems
+from dlt.common.schema import TTableSchema
+from dlt.common.data_writers.writers import TLoaderFileFormat
+from dlt.common.destination.reference import Destination
+from dlt.pipeline.exceptions import PipelineStepFailed
+from dlt.common.utils import uniq_id
+from dlt.common.exceptions import DestinationTerminalException, InvalidDestinationReference
+from dlt.common.configuration.exceptions import ConfigFieldMissingException
+from dlt.common.configuration.specs import ConnectionStringCredentials
+from dlt.destinations.impl.destination.factory import _DESTINATIONS
+from dlt.destinations.impl.destination.configuration import CustomDestinationClientConfiguration
+from dlt.common.configuration.inject import get_fun_spec
+from dlt.common.configuration.specs import BaseConfiguration
+
+from tests.load.utils import (
+    TABLE_ROW_ALL_DATA_TYPES,
+    TABLE_UPDATE_COLUMNS_SCHEMA,
+    assert_all_data_types_row,
+)
+
+SUPPORTED_LOADER_FORMATS = ["parquet", "puae-jsonl"]
+
+
+def _run_through_sink(
+    items: TDataItems,
+    loader_file_format: TLoaderFileFormat,
+    columns=None,
+    batch_size: int = 10,
+) -> List[Tuple[TDataItems, TTableSchema]]:
+    """
+    runs a list of items through the sink destination and returns colleceted calls
+    """
+    calls: List[Tuple[TDataItems, TTableSchema]] = []
+
+    @dlt.destination(loader_file_format=loader_file_format, batch_size=batch_size)
+    def test_sink(items: TDataItems, table: TTableSchema) -> None:
+        nonlocal calls
+        # convert pyarrow table to dict list here to make tests more simple downstream
+        if loader_file_format == "parquet":
+            items = items.to_pylist()  # type: ignore
+        calls.append((items, table))
+
+    @dlt.resource(columns=columns, table_name="items")
+    def items_resource() -> TDataItems:
+        nonlocal items
+        yield items
+
+    p = dlt.pipeline("sink_test", destination=test_sink, full_refresh=True)
+    p.run([items_resource()])
+
+    return calls
+
+
+@pytest.mark.parametrize("loader_file_format", SUPPORTED_LOADER_FORMATS)
+def test_all_datatypes(loader_file_format: TLoaderFileFormat) -> None:
+    data_types = deepcopy(TABLE_ROW_ALL_DATA_TYPES)
+    column_schemas = deepcopy(TABLE_UPDATE_COLUMNS_SCHEMA)
+
+    sink_calls = _run_through_sink(
+        [data_types, data_types, data_types],
+        loader_file_format,
+        columns=column_schemas,
+        batch_size=1,
+    )
+
+    # inspect result
+    assert len(sink_calls) == 3
+
+    item = sink_calls[0][0][0]
+
+    # null values are not emitted
+    data_types = {k: v for k, v in data_types.items() if v is not None}
+
+    assert_all_data_types_row(item, expect_filtered_null_columns=True)
+
+
+@pytest.mark.parametrize("loader_file_format", SUPPORTED_LOADER_FORMATS)
+@pytest.mark.parametrize("batch_size", [1, 10, 23])
+def test_batch_size(loader_file_format: TLoaderFileFormat, batch_size: int) -> None:
+    items = [{"id": i, "value": str(i)} for i in range(100)]
+
+    sink_calls = _run_through_sink(items, loader_file_format, batch_size=batch_size)
+
+    if batch_size == 1:
+        assert len(sink_calls) == 100
+        # one item per call
+        assert sink_calls[0][0][0].items() == {"id": 0, "value": "0"}.items()
+    elif batch_size == 10:
+        assert len(sink_calls) == 10
+        # ten items in first call
+        assert len(sink_calls[0][0]) == 10
+        assert sink_calls[0][0][0].items() == {"id": 0, "value": "0"}.items()
+    elif batch_size == 23:
+        assert len(sink_calls) == 5
+        # 23 items in first call
+        assert len(sink_calls[0][0]) == 23
+        assert sink_calls[0][0][0].items() == {"id": 0, "value": "0"}.items()
+
+    # check all items are present
+    all_items = set()
+    for call in sink_calls:
+        item = call[0]
+        for entry in item:
+            all_items.add(entry["value"])
+
+    assert len(all_items) == 100
+    for i in range(100):
+        assert str(i) in all_items
+
+
+global_calls: List[Tuple[TDataItems, TTableSchema]] = []
+
+
+def global_sink_func(items: TDataItems, table: TTableSchema) -> None:
+    global global_calls
+    global_calls.append((items, table))
+
+
+def test_instantiation() -> None:
+    # also tests _DESTINATIONS
+    calls: List[Tuple[TDataItems, TTableSchema]] = []
+
+    # NOTE: we also test injection of config vars here
+    def local_sink_func(items: TDataItems, table: TTableSchema, my_val=dlt.config.value, /) -> None:
+        nonlocal calls
+        assert my_val == "something"
+        calls.append((items, table))
+
+    os.environ["DESTINATION__MY_VAL"] = "something"
+
+    # test decorator
+    calls = []
+    p = dlt.pipeline("sink_test", destination=dlt.destination()(local_sink_func), full_refresh=True)  # type: ignore
+    p.run([1, 2, 3], table_name="items")
+    assert len(calls) == 1
+    # local func does not create entry in destinations
+    assert not _DESTINATIONS
+
+    # test passing via from_reference
+    calls = []
+    p = dlt.pipeline(
+        "sink_test",
+        destination=Destination.from_reference("destination", destination_callable=local_sink_func),
+        full_refresh=True,
+    )
+    p.run([1, 2, 3], table_name="items")
+    assert len(calls) == 1
+    # local func does not create entry in destinations
+    assert not _DESTINATIONS
+
+    # test passing string reference
+    global global_calls
+    global_calls = []
+    p = dlt.pipeline(
+        "sink_test",
+        destination=Destination.from_reference(
+            "destination",
+            destination_callable="tests.destinations.test_custom_destination.global_sink_func",
+        ),
+        full_refresh=True,
+    )
+    p.run([1, 2, 3], table_name="items")
+    assert len(global_calls) == 1
+
+    # global func will create an entry
+    assert _DESTINATIONS["global_sink_func"]
+    assert issubclass(_DESTINATIONS["global_sink_func"][0], CustomDestinationClientConfiguration)
+    assert _DESTINATIONS["global_sink_func"][1] == global_sink_func
+    assert _DESTINATIONS["global_sink_func"][2] == inspect.getmodule(global_sink_func)
+
+    # pass None as callable arg will fail on load
+    p = dlt.pipeline(
+        "sink_test",
+        destination=Destination.from_reference("destination", destination_callable=None),
+        full_refresh=True,
+    )
+    with pytest.raises(PipelineStepFailed):
+        p.run([1, 2, 3], table_name="items")
+
+    # pass invalid string reference will fail on instantiation
+    with pytest.raises(InvalidDestinationReference):
+        p = dlt.pipeline(
+            "sink_test",
+            destination=Destination.from_reference(
+                "destination", destination_callable="does.not.exist"
+            ),
+            full_refresh=True,
+        )
+
+    # using decorator without args will also work
+    calls = []
+
+    @dlt.destination
+    def simple_decorator_sink(items, table, my_val=dlt.config.value):
+        nonlocal calls
+        assert my_val == "something"
+        calls.append((items, table))
+
+    p = dlt.pipeline("sink_test", destination=simple_decorator_sink, full_refresh=True)  # type: ignore
+    p.run([1, 2, 3], table_name="items")
+    assert len(calls) == 1
+
+
+@pytest.mark.parametrize("loader_file_format", SUPPORTED_LOADER_FORMATS)
+@pytest.mark.parametrize("batch_size", [1, 10, 23])
+def test_batched_transactions(loader_file_format: TLoaderFileFormat, batch_size: int) -> None:
+    calls: Dict[str, List[TDataItems]] = {}
+    # provoke errors on resources
+    provoke_error: Dict[str, int] = {}
+
+    @dlt.destination(
+        loader_file_format=loader_file_format,
+        batch_size=batch_size,
+        skip_dlt_columns_and_tables=False,
+    )
+    def test_sink(items: TDataItems, table: TTableSchema) -> None:
+        nonlocal calls
+        table_name = table["name"]
+        if table_name.startswith("_dlt"):
+            return
+
+        # convert pyarrow table to dict list here to make tests more simple downstream
+        if loader_file_format == "parquet":
+            items = items.to_pylist()  # type: ignore
+
+        # provoke error if configured
+        if table_name in provoke_error:
+            for item in items:
+                if provoke_error[table_name] == item["id"]:
+                    raise AssertionError("Oh no!")
+
+        calls.setdefault(table_name, []).append(items)
+
+    @dlt.resource()
+    def items() -> TDataItems:
+        for i in range(100):
+            yield {"id": i, "value": str(i)}
+
+    @dlt.resource()
+    def items2() -> TDataItems:
+        for i in range(100):
+            yield {"id": i, "value": str(i)}
+
+    def assert_items_in_range(c: List[TDataItems], start: int, end: int) -> None:
+        """
+        Ensure all items where called and no duplicates are present
+        """
+        collected_items = set()
+        for call in c:
+            for item in call:
+                assert item["value"] not in collected_items
+                collected_items.add(item["value"])
+        assert len(collected_items) == end - start
+        for i in range(start, end):
+            assert str(i) in collected_items
+
+    # no errors are set, all items should be processed
+    p = dlt.pipeline("sink_test", destination=test_sink, full_refresh=True)
+    load_id = p.run([items(), items2()]).loads_ids[0]
+    assert_items_in_range(calls["items"], 0, 100)
+    assert_items_in_range(calls["items2"], 0, 100)
+
+    # destination state should have all items
+    destination_state = p.get_load_package_state(load_id)["destination_state"]
+    values = {k.split(".")[0]: v for k, v in destination_state.items()}
+    assert values == {"_dlt_pipeline_state": 1, "items": 100, "items2": 100}
+
+    # provoke errors
+    calls = {}
+    provoke_error = {"items": 25, "items2": 45}
+    p = dlt.pipeline("sink_test", destination=test_sink, full_refresh=True)
+    with pytest.raises(PipelineStepFailed):
+        p.run([items(), items2()])
+
+    # we should have data for one load id saved here
+    load_id = p.list_normalized_load_packages()[0]
+    destination_state = p.get_load_package_state(load_id)["destination_state"]
+
+    # get saved indexes mapped to table (this test will only work for one job per table)
+    values = {k.split(".")[0]: v for k, v in destination_state.items()}
+
+    # partly loaded, pointers in state should be right
+    if batch_size == 1:
+        assert_items_in_range(calls["items"], 0, 25)
+        assert_items_in_range(calls["items2"], 0, 45)
+        # one pointer for state, one for items, one for items2...
+        assert values == {"_dlt_pipeline_state": 1, "items": 25, "items2": 45}
+    elif batch_size == 10:
+        assert_items_in_range(calls["items"], 0, 20)
+        assert_items_in_range(calls["items2"], 0, 40)
+        assert values == {"_dlt_pipeline_state": 1, "items": 20, "items2": 40}
+    elif batch_size == 23:
+        assert_items_in_range(calls["items"], 0, 23)
+        assert_items_in_range(calls["items2"], 0, 23)
+        assert values == {"_dlt_pipeline_state": 1, "items": 23, "items2": 23}
+    else:
+        raise AssertionError("Unknown batch size")
+
+    # load the rest
+    first_calls = deepcopy(calls)
+    provoke_error = {}
+    calls = {}
+    p.load()
+
+    # destination state should have all items
+    destination_state = p.get_load_package_state(load_id)["destination_state"]
+    values = {k.split(".")[0]: v for k, v in destination_state.items()}
+    assert values == {"_dlt_pipeline_state": 1, "items": 100, "items2": 100}
+
+    # both calls combined should have every item called just once
+    assert_items_in_range(calls["items"] + first_calls["items"], 0, 100)
+    assert_items_in_range(calls["items2"] + first_calls["items2"], 0, 100)
+
+
+def test_naming_convention() -> None:
+    @dlt.resource(table_name="PErson")
+    def resource():
+        yield [{"UpperCase": 1, "snake_case": 1, "camelCase": 1}]
+
+    # check snake case
+    @dlt.destination(naming_convention="snake_case")
+    def snake_sink(items, table):
+        assert table["name"] == "p_erson"
+        assert table["columns"]["upper_case"]["name"] == "upper_case"
+        assert table["columns"]["snake_case"]["name"] == "snake_case"
+        assert table["columns"]["camel_case"]["name"] == "camel_case"
+
+    dlt.pipeline("sink_test", destination=snake_sink, full_refresh=True).run(resource())
+
+    # check default (which is direct)
+    @dlt.destination()
+    def direct_sink(items, table):
+        assert table["name"] == "PErson"
+        assert table["columns"]["UpperCase"]["name"] == "UpperCase"
+        assert table["columns"]["snake_case"]["name"] == "snake_case"
+        assert table["columns"]["camelCase"]["name"] == "camelCase"
+
+    dlt.pipeline("sink_test", destination=direct_sink, full_refresh=True).run(resource())
+
+
+def test_file_batch() -> None:
+    @dlt.resource(table_name="person")
+    def resource1():
+        for i in range(100):
+            yield [{"id": i, "name": f"Name {i}"}]
+
+    @dlt.resource(table_name="address")
+    def resource2():
+        for i in range(50):
+            yield [{"id": i, "city": f"City {i}"}]
+
+    @dlt.destination(batch_size=0, loader_file_format="parquet")
+    def direct_sink(file_path, table):
+        from dlt.common.libs.pyarrow import pyarrow
+
+        assert table["name"] in ["person", "address"]
+
+        with pyarrow.parquet.ParquetFile(file_path) as reader:
+            assert reader.metadata.num_rows == (100 if table["name"] == "person" else 50)
+
+    dlt.pipeline("sink_test", destination=direct_sink, full_refresh=True).run(
+        [resource1(), resource2()]
+    )
+
+
+def test_config_spec() -> None:
+    # NOTE: define the destination before the env var to test env vars are evaluated
+    # at runtime
+    @dlt.destination()
+    def my_sink(file_path, table, my_val=dlt.config.value):
+        assert my_val == "something"
+
+    print(my_sink)
+
+    # if no value is present, it should raise
+    with pytest.raises(ConfigFieldMissingException):
+        dlt.pipeline("sink_test", destination=my_sink, full_refresh=True).run(
+            [1, 2, 3], table_name="items"
+        )
+
+    # we may give the value via __callable__ function
+    dlt.pipeline("sink_test", destination=my_sink(my_val="something"), full_refresh=True).run(
+        [1, 2, 3], table_name="items"
+    )
+
+    # right value will pass
+    os.environ["DESTINATION__MY_SINK__MY_VAL"] = "something"
+    dlt.pipeline("sink_test", destination=my_sink, full_refresh=True).run(
+        [1, 2, 3], table_name="items"
+    )
+
+    # wrong value will raise
+    os.environ["DESTINATION__MY_SINK__MY_VAL"] = "wrong"
+    with pytest.raises(PipelineStepFailed):
+        dlt.pipeline("sink_test", destination=my_sink, full_refresh=True).run(
+            [1, 2, 3], table_name="items"
+        )
+
+    # will respect given name
+    @dlt.destination(name="some_name")
+    def other_sink(file_path, table, my_val=dlt.config.value):
+        assert my_val == "something"
+
+    # if no value is present, it should raise
+    with pytest.raises(ConfigFieldMissingException):
+        dlt.pipeline("sink_test", destination=other_sink, full_refresh=True).run(
+            [1, 2, 3], table_name="items"
+        )
+
+    # right value will pass
+    os.environ["DESTINATION__SOME_NAME__MY_VAL"] = "something"
+    dlt.pipeline("sink_test", destination=other_sink, full_refresh=True).run(
+        [1, 2, 3], table_name="items"
+    )
+
+    # test nested spec
+
+    @dlt.destination()
+    def my_gcp_sink(
+        file_path,
+        table,
+        credentials: ConnectionStringCredentials = dlt.secrets.value,
+    ):
+        assert credentials.drivername == "my_driver"
+        assert credentials.database == "my_database"
+        assert credentials.username == "my_user_name"
+
+    # missing spec
+    with pytest.raises(ConfigFieldMissingException):
+        dlt.pipeline("sink_test", destination=my_gcp_sink, full_refresh=True).run(
+            [1, 2, 3], table_name="items"
+        )
+
+    # add gcp vars (in different sections for testing)
+    os.environ["SINK_TEST__DESTINATION__CREDENTIALS__DRIVERNAME"] = "my_driver"
+    os.environ["DESTINATION__CREDENTIALS__DATABASE"] = "my_database"
+    os.environ["CREDENTIALS__USERNAME"] = "my_user_name"
+
+    # now it will run
+    dlt.pipeline("sink_test", destination=my_gcp_sink, full_refresh=True).run(
+        [1, 2, 3], table_name="items"
+    )
+
+
+def test_destination_with_spec() -> None:
+    @configspec
+    class MyDestinationSpec(CustomDestinationClientConfiguration):
+        my_predefined_val: str
+
+    # check destination without additional config params
+    @dlt.destination(spec=MyDestinationSpec)
+    def sink_func_with_spec(
+        items: TDataItems, table: TTableSchema, my_predefined_val=dlt.config.value
+    ) -> None:
+        # raise DestinationTerminalException("NEVER")
+        pass
+
+    wrapped_callable = sink_func_with_spec().config_params["destination_callable"]
+    spec = get_fun_spec(wrapped_callable)
+    # it is exactly the type
+    assert spec == MyDestinationSpec == sink_func_with_spec().spec
+
+    # call fails because `my_predefined_val` is required part of spec, even if not injected
+    with pytest.raises(ConfigFieldMissingException):
+        info = dlt.pipeline("sink_test", destination=sink_func_with_spec(), full_refresh=True).run(
+            [1, 2, 3], table_name="items"
+        )
+        info.raise_on_failed_jobs()
+
+    # call happens now
+    os.environ["MY_PREDEFINED_VAL"] = "VAL"
+    info = dlt.pipeline("sink_test", destination=sink_func_with_spec(), full_refresh=True).run(
+        [1, 2, 3], table_name="items"
+    )
+    info.raise_on_failed_jobs()
+
+    # check destination with additional config params
+    @dlt.destination(spec=MyDestinationSpec)
+    def sink_func_with_spec_and_additional_params(
+        items: TDataItems, table: TTableSchema, other_val: str = dlt.config.value
+    ) -> None:
+        # other_val won't be injected but can be explicitly passed
+        assert other_val is None  # dlt.config.value evaluates to none
+
+    wrapped_callable = sink_func_with_spec_and_additional_params().config_params[
+        "destination_callable"
+    ]
+    spec = get_fun_spec(wrapped_callable)
+    assert spec is MyDestinationSpec
+    os.environ["OTHER_VAL"] = "VAL"
+
+    # check destination spec with incorrect baseclass
+    @dlt.destination(spec=BaseConfiguration)  # type: ignore
+    def sink_func_wrong_base(
+        items: TDataItems, table: TTableSchema, other_val: str = dlt.config.value
+    ) -> None:
+        pass
+
+    with pytest.raises(ValueError):
+        sink_func_wrong_base()
+
+    # check no base
+    @dlt.destination(spec=None)
+    def sink_func_no_spec(
+        items: TDataItems, table: TTableSchema, other_val: str = dlt.config.value
+    ) -> None:
+        pass
+
+    wrapped_callable = sink_func_no_spec().config_params["destination_callable"]
+    spec = get_fun_spec(wrapped_callable)
+    assert issubclass(spec, CustomDestinationClientConfiguration)
+
+
+@pytest.mark.parametrize("loader_file_format", SUPPORTED_LOADER_FORMATS)
+@pytest.mark.parametrize("remove_stuff", [True, False])
+def test_remove_internal_tables_and_columns(loader_file_format, remove_stuff) -> None:
+    found_dlt_table = False
+    found_dlt_column = False
+    found_dlt_column_value = False
+
+    @dlt.destination(
+        skip_dlt_columns_and_tables=remove_stuff, loader_file_format=loader_file_format
+    )
+    def test_sink(items, table):
+        nonlocal found_dlt_table, found_dlt_column, found_dlt_column_value
+        if table["name"].startswith("_dlt"):
+            found_dlt_table = True
+        for column in table["columns"].keys():
+            if column.startswith("_dlt"):
+                found_dlt_column = True
+
+        # check actual data items
+        if loader_file_format == "puae-jsonl":
+            for item in items:
+                for key in item.keys():
+                    if key.startswith("_dlt"):
+                        found_dlt_column_value = True
+        else:
+            for column in items.column_names:
+                if column.startswith("_dlt"):
+                    found_dlt_column_value = True
+
+    # test with and without removing
+    p = dlt.pipeline("sink_test", destination=test_sink, full_refresh=True)
+    p.run([{"id": 1, "value": "1"}], table_name="some_table")
+
+    assert found_dlt_column != remove_stuff
+    assert found_dlt_table != remove_stuff
+    assert found_dlt_column_value != remove_stuff
+
+
+@pytest.mark.parametrize("nesting", [None, 0, 1, 3])
+def test_max_nesting_level(nesting: int) -> None:
+    # 4 nesting levels
+    data = [
+        {
+            "level": 1,
+            "children": [{"level": 2, "children": [{"level": 3, "children": [{"level": 4}]}]}],
+        }
+    ]
+
+    found_tables = set()
+
+    @dlt.destination(loader_file_format="puae-jsonl", max_table_nesting=nesting)
+    def nesting_sink(items, table):
+        nonlocal found_tables
+        found_tables.add(table["name"])
+
+    @dlt.source(max_table_nesting=2)
+    def source():
+        yield dlt.resource(data, name="data")
+
+    p = dlt.pipeline("sink_test_max_nesting", destination=nesting_sink, full_refresh=True)
+    p.run(source())
+
+    # fall back to source setting
+    if nesting is None:
+        assert len(found_tables) == 3
+    else:
+        # use destination setting
+        assert len(found_tables) == nesting + 1
+
+    for table in found_tables:
+        assert table.startswith("data")
diff --git a/tests/extract/test_decorators.py b/tests/extract/test_decorators.py
index 03f87db923..0f19239330 100644
--- a/tests/extract/test_decorators.py
+++ b/tests/extract/test_decorators.py
@@ -631,14 +631,14 @@ def test_sources_no_arguments() -> None:
     def no_args():
         return dlt.resource([1, 2], name="data")
 
-    # there is no spec if no arguments
+    # there is a spec even if no arguments
     SPEC = _SOURCES[no_args.__qualname__].SPEC
-    assert SPEC is None
+    assert SPEC
     _, _, checked = detect_source_configs(_SOURCES, "", ())
     assert no_args.__qualname__ in checked
 
     SPEC = _SOURCES[no_args.__qualname__].SPEC
-    assert SPEC is None
+    assert SPEC
     _, _, checked = detect_source_configs(_SOURCES, "", ())
     assert not_args_r.__qualname__ in checked
 
diff --git a/tests/extract/test_extract.py b/tests/extract/test_extract.py
index 28b08c3648..b86e198988 100644
--- a/tests/extract/test_extract.py
+++ b/tests/extract/test_extract.py
@@ -89,6 +89,14 @@ def table_name_with_lambda(_range):
     assert "table_name_with_lambda" not in schema.tables
 
 
+def test_make_hints_default() -> None:
+    hints = make_hints()
+    assert hints == {"columns": {}}
+
+    hints = make_hints(write_disposition=None)
+    assert hints == {"columns": {}}
+
+
 def test_extract_hints_mark(extract_step: Extract) -> None:
     @dlt.resource
     def with_table_hints():
diff --git a/tests/extract/test_incremental.py b/tests/extract/test_incremental.py
index 7956c83947..a393706de7 100644
--- a/tests/extract/test_incremental.py
+++ b/tests/extract/test_incremental.py
@@ -1,5 +1,6 @@
 import os
 import asyncio
+import random
 from time import sleep
 from typing import Optional, Any
 from unittest import mock
@@ -14,13 +15,14 @@
 from dlt.common.configuration.specs.base_configuration import configspec, BaseConfiguration
 from dlt.common.configuration import ConfigurationValueError
 from dlt.common.pendulum import pendulum, timedelta
-from dlt.common.pipeline import StateInjectableContext, resource_state
+from dlt.common.pipeline import NormalizeInfo, StateInjectableContext, resource_state
 from dlt.common.schema.schema import Schema
 from dlt.common.utils import uniq_id, digest128, chunks
 from dlt.common.json import json
 
 from dlt.extract import DltSource
 from dlt.extract.exceptions import InvalidStepFunctionArguments
+from dlt.extract.resource import DltResource
 from dlt.sources.helpers.transform import take_first
 from dlt.extract.incremental.exceptions import (
     IncrementalCursorPathMissing,
@@ -125,11 +127,11 @@ def test_unique_keys_are_deduplicated(item_type: TDataItemFormat) -> None:
         {"created_at": 3, "id": "e"},
     ]
     data2 = [
+        {"created_at": 4, "id": "g"},
         {"created_at": 3, "id": "c"},
         {"created_at": 3, "id": "d"},
         {"created_at": 3, "id": "e"},
         {"created_at": 3, "id": "f"},
-        {"created_at": 4, "id": "g"},
     ]
 
     source_items1 = data_to_item_format(item_type, data1)
@@ -1307,7 +1309,6 @@ def descending_single_item(
         for i in reversed(range(14)):
             data = [{"updated_at": i}]
             yield from data_to_item_format(item_type, data)
-            yield {"updated_at": i}
             if i >= 10:
                 assert updated_at.start_out_of_range is False
             else:
@@ -1375,7 +1376,8 @@ def descending(
     assert data_item_length(data) == 48 - 10 + 1  # both bounds included
 
 
-def test_transformer_row_order_out_of_range() -> None:
+@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS)
+def test_transformer_row_order_out_of_range(item_type: TDataItemFormat) -> None:
     out_of_range = []
 
     @dlt.transformer
@@ -1387,13 +1389,14 @@ def descending(
     ) -> Any:
         for chunk in chunks(count(start=48, step=-1), 10):
             data = [{"updated_at": i, "package": package} for i in chunk]
+            # print(data)
             yield data_to_item_format("json", data)
             if updated_at.can_close():
                 out_of_range.append(package)
                 return
 
     data = list([3, 2, 1] | descending)
-    assert len(data) == 48 - 10 + 1
+    assert data_item_length(data) == 48 - 10 + 1
     # we take full package 3 and then nothing in 1 and 2
     assert len(out_of_range) == 3
 
@@ -1453,6 +1456,143 @@ def ascending_desc(
         assert data_item_length(data) == 45 - 22
 
 
+@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS)
+@pytest.mark.parametrize("order", ["random", "desc", "asc"])
+@pytest.mark.parametrize("primary_key", [[], None, "updated_at"])
+@pytest.mark.parametrize(
+    "deterministic", (True, False), ids=("deterministic-record", "non-deterministic-record")
+)
+def test_unique_values_unordered_rows(
+    item_type: TDataItemFormat, order: str, primary_key: Any, deterministic: bool
+) -> None:
+    @dlt.resource(primary_key=primary_key)
+    def random_ascending_chunks(
+        order: str,
+        updated_at: dlt.sources.incremental[int] = dlt.sources.incremental(
+            "updated_at",
+            initial_value=10,
+        ),
+    ) -> Any:
+        range_ = list(range(updated_at.start_value, updated_at.start_value + 121))
+        if order == "random":
+            random.shuffle(range_)
+        if order == "desc":
+            range_ = reversed(range_)  # type: ignore[assignment]
+
+        for chunk in chunks(range_, 30):
+            # make sure that overlapping element is the last one
+            data = [
+                {"updated_at": i, "rand": random.random() if not deterministic else 0}
+                for i in chunk
+            ]
+            # random.shuffle(data)
+            yield data_to_item_format(item_type, data)
+
+    os.environ["COMPLETED_PROB"] = "1.0"  # make it complete immediately
+    pipeline = dlt.pipeline("test_unique_values_unordered_rows", destination="dummy")
+    pipeline.run(random_ascending_chunks(order))
+    assert pipeline.last_trace.last_normalize_info.row_counts["random_ascending_chunks"] == 121
+
+    # 120 rows (one overlap - incremental reacquires and deduplicates)
+    pipeline.run(random_ascending_chunks(order))
+    # overlapping element must be deduped when:
+    # 1. we have primary key on just updated at
+    # OR we have a key on full record but the record is deterministic so duplicate may be found
+    rows = 120 if primary_key == "updated_at" or (deterministic and primary_key != []) else 121
+    assert pipeline.last_trace.last_normalize_info.row_counts["random_ascending_chunks"] == rows
+
+
+@pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS)
+@pytest.mark.parametrize("primary_key", [[], None, "updated_at"])  # [], None,
+@pytest.mark.parametrize(
+    "deterministic", (True, False), ids=("deterministic-record", "non-deterministic-record")
+)
+def test_carry_unique_hashes(
+    item_type: TDataItemFormat, primary_key: Any, deterministic: bool
+) -> None:
+    # each day extends list of hashes and removes duplicates until the last day
+
+    @dlt.resource(primary_key=primary_key)
+    def random_ascending_chunks(
+        # order: str,
+        day: int,
+        updated_at: dlt.sources.incremental[int] = dlt.sources.incremental(
+            "updated_at",
+            initial_value=10,
+        ),
+    ) -> Any:
+        range_ = random.sample(
+            range(updated_at.initial_value, updated_at.initial_value + 10), k=10
+        )  # list(range(updated_at.initial_value, updated_at.initial_value + 10))
+        range_ += [100]
+        if day == 4:
+            # on day 4 add an element that will reset all others
+            range_ += [1000]
+
+        for chunk in chunks(range_, 3):
+            # make sure that overlapping element is the last one
+            data = [
+                {"updated_at": i, "rand": random.random() if not deterministic else 0}
+                for i in chunk
+            ]
+            yield data_to_item_format(item_type, data)
+
+    os.environ["COMPLETED_PROB"] = "1.0"  # make it complete immediately
+    pipeline = dlt.pipeline("test_unique_values_unordered_rows", destination="dummy")
+
+    def _assert_state(r_: DltResource, day: int, info: NormalizeInfo) -> None:
+        uniq_hashes = r_.state["incremental"]["updated_at"]["unique_hashes"]
+        row_count = info.row_counts.get("random_ascending_chunks", 0)
+        if primary_key == "updated_at":
+            # we keep only newest version of the record
+            assert len(uniq_hashes) == 1
+            if day == 1:
+                # all records loaded
+                assert row_count == 11
+            elif day == 4:
+                # new biggest item loaded
+                assert row_count == 1
+            else:
+                # all deduplicated
+                assert row_count == 0
+        elif primary_key is None:
+            # we deduplicate over full content
+            if day == 4:
+                assert len(uniq_hashes) == 1
+                # both the 100 or 1000 are in if non deterministic content
+                assert row_count == (2 if not deterministic else 1)
+            else:
+                # each day adds new hash if content non deterministic
+                assert len(uniq_hashes) == (day if not deterministic else 1)
+                if day == 1:
+                    assert row_count == 11
+                else:
+                    assert row_count == (1 if not deterministic else 0)
+        elif primary_key == []:
+            # no deduplication
+            assert len(uniq_hashes) == 0
+            if day == 4:
+                assert row_count == 2
+            else:
+                if day == 1:
+                    assert row_count == 11
+                else:
+                    assert row_count == 1
+
+    r_ = random_ascending_chunks(1)
+    pipeline.run(r_)
+    _assert_state(r_, 1, pipeline.last_trace.last_normalize_info)
+    r_ = random_ascending_chunks(2)
+    pipeline.run(r_)
+    _assert_state(r_, 2, pipeline.last_trace.last_normalize_info)
+    r_ = random_ascending_chunks(3)
+    pipeline.run(r_)
+    _assert_state(r_, 3, pipeline.last_trace.last_normalize_info)
+    r_ = random_ascending_chunks(4)
+    pipeline.run(r_)
+    _assert_state(r_, 4, pipeline.last_trace.last_normalize_info)
+
+
 @pytest.mark.parametrize("item_type", ALL_DATA_ITEM_FORMATS)
 def test_get_incremental_value_type(item_type: TDataItemFormat) -> None:
     assert dlt.sources.incremental("id").get_incremental_value_type() is Any
diff --git a/tests/extract/test_sources.py b/tests/extract/test_sources.py
index 5895c3b658..d9c73dfb20 100644
--- a/tests/extract/test_sources.py
+++ b/tests/extract/test_sources.py
@@ -1326,9 +1326,11 @@ def empty_gen():
 
     empty_r = empty()
     with pytest.raises(InconsistentTableTemplate):
-        empty_r.apply_hints(parent_table_name=lambda ev: ev["p"])
+        empty_r.apply_hints(parent_table_name=lambda ev: ev["p"], write_disposition=None)
 
-    empty_r.apply_hints(table_name=lambda ev: ev["t"], parent_table_name=lambda ev: ev["p"])
+    empty_r.apply_hints(
+        table_name=lambda ev: ev["t"], parent_table_name=lambda ev: ev["p"], write_disposition=None
+    )
     assert empty_r._table_name_hint_fun is not None
     assert empty_r._table_has_other_dynamic_hints is True
 
@@ -1360,6 +1362,15 @@ def empty_gen():
     assert table["columns"]["tags"] == {"name": "tags"}
 
 
+def test_resource_no_template() -> None:
+    empty = DltResource.from_data([1, 2, 3], name="table")
+    assert empty.write_disposition == "append"
+    assert empty.compute_table_schema()["write_disposition"] == "append"
+    empty.apply_hints()
+    assert empty.write_disposition == "append"
+    assert empty.compute_table_schema()["write_disposition"] == "append"
+
+
 def test_selected_pipes_with_duplicates():
     def input_gen():
         yield from [1, 2, 3]
diff --git a/tests/helpers/airflow_tests/test_airflow_wrapper.py b/tests/helpers/airflow_tests/test_airflow_wrapper.py
index d01330c8b2..84a30f730c 100644
--- a/tests/helpers/airflow_tests/test_airflow_wrapper.py
+++ b/tests/helpers/airflow_tests/test_airflow_wrapper.py
@@ -435,7 +435,7 @@ def dag_parallel():
     for i in range(0, 3):
         pipeline_dag_parallel = dlt.attach(
             pipeline_name=snake_case.normalize_identifier(
-                dag_def.tasks[i].task_id.replace("pipeline_dag_parallel.", "")
+                dag_def.tasks[i].task_id.replace("pipeline_dag_parallel.", "")[:-2]
             )
         )
         pipeline_dag_decomposed_counts = load_table_counts(
@@ -852,3 +852,68 @@ def get_task_run(dag_def: DAG, task_name: str, now: pendulum.DateTime) -> TaskIn
     dag_def.run(start_date=now, run_at_least_once=True)
     task_def = dag_def.task_dict[task_name]
     return TaskInstance(task=task_def, execution_date=now)
+
+
+def test_task_already_added():
+    """
+    Test that the error 'Task id {id} has already been added to the DAG'
+    is not happening while adding two same sources.
+    """
+    tasks_list: List[PythonOperator] = None
+
+    @dag(schedule=None, start_date=pendulum.today(), catchup=False)
+    def dag_parallel():
+        nonlocal tasks_list
+
+        tasks = PipelineTasksGroup(
+            "test_pipeline",
+            local_data_folder="_storage",
+            wipe_local_data=False,
+        )
+
+        source = mock_data_source()
+
+        pipe = dlt.pipeline(
+            pipeline_name="test_pipeline",
+            dataset_name="mock_data",
+            destination="duckdb",
+            credentials=os.path.join("_storage", "test_pipeline.duckdb"),
+        )
+        task = tasks.add_run(
+            pipe,
+            source,
+            decompose="none",
+            trigger_rule="all_done",
+            retries=0,
+            provide_context=True,
+        )[0]
+        assert task.task_id == "test_pipeline.mock_data_source__r_init-_t_init_post-_t1-_t2-2-more"
+
+        task = tasks.add_run(
+            pipe,
+            source,
+            decompose="none",
+            trigger_rule="all_done",
+            retries=0,
+            provide_context=True,
+        )[0]
+        assert (
+            task.task_id == "test_pipeline.mock_data_source__r_init-_t_init_post-_t1-_t2-2-more-2"
+        )
+
+        tasks_list = tasks.add_run(
+            pipe,
+            source,
+            decompose="none",
+            trigger_rule="all_done",
+            retries=0,
+            provide_context=True,
+        )
+        assert (
+            tasks_list[0].task_id
+            == "test_pipeline.mock_data_source__r_init-_t_init_post-_t1-_t2-2-more-3"
+        )
+
+    dag_def = dag_parallel()
+    assert len(tasks_list) == 1
+    dag_def.test()
diff --git a/tests/helpers/streamlit_tests/test_streamlit_show_resources.py b/tests/helpers/streamlit_tests/test_streamlit_show_resources.py
index a26e9b774d..dd807260fe 100644
--- a/tests/helpers/streamlit_tests/test_streamlit_show_resources.py
+++ b/tests/helpers/streamlit_tests/test_streamlit_show_resources.py
@@ -5,9 +5,23 @@
 
     dlt pipeline test_resources_pipeline show
 """
+import os
+import sys
+from pathlib import Path
+
+import pytest
 
 import dlt
 
+from streamlit.testing.v1 import AppTest  # type: ignore
+
+from dlt.helpers.streamlit_app.utils import render_with_pipeline
+from dlt.pipeline.exceptions import CannotRestorePipelineException
+
+here = Path(__file__).parent
+dlt_root = here.parent.parent.parent.absolute()
+streamlit_app_path = dlt_root / "dlt/helpers/streamlit_app"
+
 
 @dlt.source
 def source1(nr):
@@ -25,13 +39,25 @@ def get_resource(nr):
     yield resource
 
 
-@dlt.source
+@dlt.source()
 def source2(nr):
     def get_resource2(nr):
         for i in range(nr):
             yield {"id": i, "column_2": f"xyz_{i}"}
 
-    def get_resource3(nr):
+    @dlt.resource(
+        name="Three",
+        write_disposition="merge",
+        primary_key=["column_3", "column_4"],
+        merge_key=["column_3"],
+    )
+    def get_resource3(
+        nr,
+        id_inc: dlt.sources.incremental[int] = dlt.sources.incremental(
+            "id",
+            initial_value=0,
+        ),
+    ):
         for i in range(nr):
             yield {"id": i, "column_3": f"pqr_{i}", "column_4": f"pqrr_{i}"}
 
@@ -42,28 +68,97 @@ def get_resource3(nr):
         primary_key="column_2",
         merge_key=["column_2"],
     )
-    yield dlt.resource(
-        get_resource3(nr),
-        name="Three",
-        write_disposition="merge",
-        primary_key=["column_3", "column_4"],
-        merge_key=["column_3"],
-    )
+    yield get_resource3(nr)
 
 
 def test_multiple_resources_pipeline():
     pipeline = dlt.pipeline(
-        pipeline_name="test_resources_pipeline", destination="duckdb", dataset_name="rows_data2"
+        pipeline_name="test_resources_pipeline",
+        destination="duckdb",
+        dataset_name="rows_data2",
     )
     load_info = pipeline.run([source1(10), source2(20)])
 
     source1_schema = load_info.pipeline.schemas.get("source1")
 
-    assert load_info.pipeline.schema_names == ["source2", "source1"]  # type: ignore[attr-defined]
+    assert set(load_info.pipeline.schema_names) == set(["source2", "source1"])  # type: ignore[attr-defined]
 
     assert source1_schema.data_tables()[0]["name"] == "one"
     assert source1_schema.data_tables()[0]["columns"]["column_1"].get("primary_key") is True
     assert source1_schema.data_tables()[0]["columns"]["column_1"].get("merge_key") is True
     assert source1_schema.data_tables()[0]["write_disposition"] == "merge"
+    os.environ["DLT_TEST_PIPELINE_NAME"] = "test_resources_pipeline"
+    streamlit_app = AppTest.from_file(str(streamlit_app_path / "index.py"), default_timeout=5)
+    streamlit_app.run()
+    assert not streamlit_app.exception
+
+    # Check color mode switching updates session stats
+    streamlit_app.sidebar.button[0].click().run()
+    assert not streamlit_app.exception
+    assert streamlit_app.session_state["color_mode"] == "light"
+
+    streamlit_app.sidebar.button[1].click().run()
+    assert not streamlit_app.exception
+    assert streamlit_app.session_state["color_mode"] == "dark"
+
+    # Check page links in sidebar
+    assert "Explore data" in streamlit_app.sidebar[2].label
+    assert "Load info" in streamlit_app.sidebar[3].label
+
+    # Check that at leas 4 content sections rendered
+    assert len(streamlit_app.subheader) > 4
+
+    # Check Explore data page
+    assert streamlit_app.subheader[0].value == "Schemas and tables"
+    assert streamlit_app.subheader[1].value == "Schema: source1"
+    assert streamlit_app.subheader[2].value == "Table: one"
+    assert streamlit_app.subheader[3].value == "Run your query"
+    assert streamlit_app.subheader[4].value == "Pipeline info"
+
+
+def test_multiple_resources_pipeline_with_dummy_destination():
+    pipeline = dlt.pipeline(
+        pipeline_name="test_resources_pipeline_dummy_destination",
+        destination="dummy",
+        dataset_name="rows_data2",
+    )
+    pipeline.run([source1(10), source2(20)])
+
+    os.environ["DLT_TEST_PIPELINE_NAME"] = "test_resources_pipeline_dummy_destination"
+    streamlit_app = AppTest.from_file(
+        str(streamlit_app_path / "index.py"),
+        # bigger timeout because dlt might be slow at
+        # loading stage for dummy destination and timeout
+        default_timeout=8,
+    )
+    streamlit_app.run()
+
+    assert not streamlit_app.exception
+
+    # We should have at least 2 errors one on the sidebar
+    # and the other two errors in the page for missing sql client
+    assert streamlit_app.error.len >= 2
+
+
+def test_render_with_pipeline_with_different_pipeline_dirs():
+    pipeline = dlt.pipeline(
+        pipeline_name="test_resources_pipeline_dummy_destination",
+        destination="dummy",
+    )
+    pipeline.run([{"n": 1}, {"n": 2}], table_name="numbers")
+    os.environ["DLT_TEST_PIPELINE_NAME"] = "test_resources_pipeline_dummy_destination"
+    base_args = ["dlt-show", "pipeline_name", "--pipelines-dir"]
+
+    def dummy_render(pipeline: dlt.Pipeline) -> None:
+        pass
+
+    old_args = sys.argv[:]
+    with pytest.raises(CannotRestorePipelineException):
+        sys.argv = [*base_args, "/run/dlt"]
+        render_with_pipeline(dummy_render)
+
+    with pytest.raises(CannotRestorePipelineException):
+        sys.argv = [*base_args, "/tmp/dlt"]
+        render_with_pipeline(dummy_render)
 
-    # The rest should be inspected using the streamlit tool.
+    sys.argv = old_args
diff --git a/tests/load/pipeline/test_drop.py b/tests/load/pipeline/test_drop.py
index cd18454d7c..8614af4734 100644
--- a/tests/load/pipeline/test_drop.py
+++ b/tests/load/pipeline/test_drop.py
@@ -106,7 +106,9 @@ def assert_destination_state_loaded(pipeline: Pipeline) -> None:
     """Verify stored destination state matches the local pipeline state"""
     client: SqlJobClientBase
     with pipeline.destination_client() as client:  # type: ignore[assignment]
-        destination_state = state_sync.load_state_from_destination(pipeline.pipeline_name, client)
+        destination_state = state_sync.load_pipeline_state_from_destination(
+            pipeline.pipeline_name, client
+        )
     pipeline_state = dict(pipeline.state)
     del pipeline_state["_local"]
     assert pipeline_state == destination_state
diff --git a/tests/load/pipeline/test_pipelines.py b/tests/load/pipeline/test_pipelines.py
index a93599831d..05c70e2f62 100644
--- a/tests/load/pipeline/test_pipelines.py
+++ b/tests/load/pipeline/test_pipelines.py
@@ -932,7 +932,7 @@ def table_3(make_data=False):
 
     # load with one empty job, table 3 not created
     load_info = pipeline.run(source.table_3, loader_file_format=destination_config.file_format)
-    assert_load_info(load_info)
+    assert_load_info(load_info, expected_load_packages=0)
     with pytest.raises(DatabaseUndefinedRelation):
         load_table_counts(pipeline, "table_3")
     # print(pipeline.default_schema.to_pretty_yaml())
diff --git a/tests/load/pipeline/test_restore_state.py b/tests/load/pipeline/test_restore_state.py
index 5ef2206031..02da91cefe 100644
--- a/tests/load/pipeline/test_restore_state.py
+++ b/tests/load/pipeline/test_restore_state.py
@@ -13,7 +13,11 @@
 from dlt.pipeline.exceptions import SqlClientNotAvailable
 
 from dlt.pipeline.pipeline import Pipeline
-from dlt.pipeline.state_sync import STATE_TABLE_COLUMNS, load_state_from_destination, state_resource
+from dlt.pipeline.state_sync import (
+    STATE_TABLE_COLUMNS,
+    load_pipeline_state_from_destination,
+    state_resource,
+)
 from dlt.destinations.job_client_impl import SqlJobClientBase
 
 from tests.utils import TEST_STORAGE_ROOT
@@ -54,14 +58,14 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) -
     job_client: SqlJobClientBase
     with p.destination_client(p.default_schema.name) as job_client:  # type: ignore[assignment]
         with pytest.raises(DestinationUndefinedEntity):
-            load_state_from_destination(p.pipeline_name, job_client)
+            load_pipeline_state_from_destination(p.pipeline_name, job_client)
         # sync the schema
         p.sync_schema()
         exists, _ = job_client.get_storage_table(schema.version_table_name)
         assert exists is True
         # dataset exists, still no table
         with pytest.raises(DestinationUndefinedEntity):
-            load_state_from_destination(p.pipeline_name, job_client)
+            load_pipeline_state_from_destination(p.pipeline_name, job_client)
         initial_state = p._get_state()
         # now add table to schema and sync
         initial_state["_local"]["_last_extracted_at"] = pendulum.now()
@@ -84,14 +88,14 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) -
         exists, _ = job_client.get_storage_table(schema.state_table_name)
         assert exists is True
         # table is there but no state
-        assert load_state_from_destination(p.pipeline_name, job_client) is None
+        assert load_pipeline_state_from_destination(p.pipeline_name, job_client) is None
         # extract state
         with p.managed_state(extract_state=True):
             pass
         # just run the existing extract
         p.normalize(loader_file_format=destination_config.file_format)
         p.load()
-        stored_state = load_state_from_destination(p.pipeline_name, job_client)
+        stored_state = load_pipeline_state_from_destination(p.pipeline_name, job_client)
         local_state = p._get_state()
         local_state.pop("_local")
         assert stored_state == local_state
@@ -101,7 +105,7 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) -
             managed_state["sources"] = {"source": dict(JSON_TYPED_DICT_DECODED)}
         p.normalize(loader_file_format=destination_config.file_format)
         p.load()
-        stored_state = load_state_from_destination(p.pipeline_name, job_client)
+        stored_state = load_pipeline_state_from_destination(p.pipeline_name, job_client)
         assert stored_state["sources"] == {"source": JSON_TYPED_DICT_DECODED}
         local_state = p._get_state()
         local_state.pop("_local")
@@ -116,7 +120,7 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) -
         p.normalize(loader_file_format=destination_config.file_format)
         info = p.load()
         assert len(info.loads_ids) == 0
-        new_stored_state = load_state_from_destination(p.pipeline_name, job_client)
+        new_stored_state = load_pipeline_state_from_destination(p.pipeline_name, job_client)
         # new state should not be stored
         assert new_stored_state == stored_state
 
@@ -147,7 +151,7 @@ def test_restore_state_utils(destination_config: DestinationTestConfiguration) -
         p.normalize(loader_file_format=destination_config.file_format)
         info = p.load()
         assert len(info.loads_ids) == 1
-        new_stored_state_2 = load_state_from_destination(p.pipeline_name, job_client)
+        new_stored_state_2 = load_pipeline_state_from_destination(p.pipeline_name, job_client)
         # the stored state changed to next version
         assert new_stored_state != new_stored_state_2
         assert new_stored_state["_state_version"] + 1 == new_stored_state_2["_state_version"]
@@ -405,7 +409,7 @@ def complete_package_mock(self, load_id: str, schema: Schema, aborted: bool = Fa
     job_client: SqlJobClientBase
     with p._get_destination_clients(p.default_schema)[0] as job_client:  # type: ignore[assignment]
         # state without completed load id is not visible
-        state = load_state_from_destination(pipeline_name, job_client)
+        state = load_pipeline_state_from_destination(pipeline_name, job_client)
         assert state is None
 
 
diff --git a/tests/load/weaviate/utils.py b/tests/load/weaviate/utils.py
index ed378191e6..1b2a74fcb8 100644
--- a/tests/load/weaviate/utils.py
+++ b/tests/load/weaviate/utils.py
@@ -79,6 +79,8 @@ def delete_classes(p, class_list):
 
 def drop_active_pipeline_data() -> None:
     def schema_has_classes(client):
+        if not hasattr(client, "db_client"):
+            return None
         schema = client.db_client.schema.get()
         return schema["classes"]
 
diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py
index 0cebeb2ff7..2f221ac8a0 100644
--- a/tests/pipeline/test_pipeline.py
+++ b/tests/pipeline/test_pipeline.py
@@ -37,6 +37,7 @@
 from dlt.extract.exceptions import InvalidResourceDataTypeBasic, PipeGenInvalid, SourceExhausted
 from dlt.extract.extract import ExtractStorage
 from dlt.extract import DltResource, DltSource
+from dlt.extract.extractors import MaterializedEmptyList
 from dlt.load.exceptions import LoadClientJobFailed
 from dlt.pipeline.exceptions import InvalidPipelineName, PipelineNotActive, PipelineStepFailed
 from dlt.pipeline.helpers import retry_load
@@ -46,6 +47,7 @@
 from tests.utils import TEST_STORAGE_ROOT
 from tests.extract.utils import expect_extracted_file
 from tests.pipeline.utils import (
+    assert_data_table_counts,
     assert_load_info,
     airtable_emojis,
     load_data_table_counts,
@@ -1366,11 +1368,11 @@ def test_resource_state_name_not_normalized() -> None:
     pipeline.load()
 
     # get state from destination
-    from dlt.pipeline.state_sync import load_state_from_destination
+    from dlt.pipeline.state_sync import load_pipeline_state_from_destination
 
     client: WithStateSync
     with pipeline.destination_client() as client:  # type: ignore[assignment]
-        state = load_state_from_destination(pipeline.pipeline_name, client)
+        state = load_pipeline_state_from_destination(pipeline.pipeline_name, client)
         assert "airtable_emojis" in state["sources"]
         assert state["sources"]["airtable_emojis"]["resources"] == {"🦚Peacock": {"🦚🦚🦚": "🦚"}}
 
@@ -1857,3 +1859,55 @@ def demand_map():
     schema_hashset.add(pipeline.schemas["nice_load_info_schema"].version_hash)
 
     assert len(schema_hashset) == 1
+
+
+def test_yielding_empty_list_creates_table() -> None:
+    pipeline = dlt.pipeline(
+        pipeline_name="empty_start",
+        destination="duckdb",
+        dataset_name="mydata",
+    )
+
+    # empty list should create empty table in the destination but with the required schema
+    extract_info = pipeline.extract(
+        [MaterializedEmptyList()],
+        table_name="empty",
+        columns=[{"name": "id", "data_type": "bigint", "nullable": True}],
+    )
+    print(extract_info)
+    normalize_info = pipeline.normalize()
+    print(normalize_info)
+    assert normalize_info.row_counts["empty"] == 0
+    load_info = pipeline.load()
+    # print(load_info.asstr(verbosity=3))
+    assert_load_info(load_info)
+    assert_data_table_counts(pipeline, {"empty": 0})
+    # make sure we have expected columns
+    assert set(pipeline.default_schema.tables["empty"]["columns"].keys()) == {
+        "id",
+        "_dlt_load_id",
+        "_dlt_id",
+    }
+
+    # load some data
+    pipeline.run([{"id": 1}], table_name="empty")
+    assert_data_table_counts(pipeline, {"empty": 1})
+
+    # update schema on existing table
+    pipeline.run(
+        [MaterializedEmptyList()],
+        table_name="empty",
+        columns=[{"name": "user_name", "data_type": "text", "nullable": True}],
+    )
+    assert_data_table_counts(pipeline, {"empty": 1})
+    assert set(pipeline.default_schema.tables["empty"]["columns"].keys()) == {
+        "id",
+        "_dlt_load_id",
+        "_dlt_id",
+        "user_name",
+    }
+    with pipeline.sql_client() as client:
+        with client.execute_query("SELECT id, user_name FROM empty") as cur:
+            rows = list(cur.fetchall())
+            assert len(rows) == 1
+            assert rows[0] == (1, None)
diff --git a/tests/pipeline/test_pipeline_extra.py b/tests/pipeline/test_pipeline_extra.py
index 81c883c273..98323a2412 100644
--- a/tests/pipeline/test_pipeline_extra.py
+++ b/tests/pipeline/test_pipeline_extra.py
@@ -1,24 +1,39 @@
 import os
+import importlib.util
 from typing import Any, ClassVar, Dict, Iterator, List, Optional
 import pytest
-from pydantic import BaseModel
+
+try:
+    from pydantic import BaseModel
+    from dlt.common.libs.pydantic import DltConfig
+except ImportError:
+    # mock pydantic with dataclasses. allow to run tests
+    # not requiring pydantic
+    from dataclasses import dataclass
+
+    @dataclass
+    class BaseModel:  # type: ignore[no-redef]
+        pass
+
 
 import dlt
 from dlt.common import json, pendulum
 from dlt.common.destination import DestinationCapabilitiesContext
 from dlt.common.destination.capabilities import TLoaderFileFormat
-from dlt.common.libs.pydantic import DltConfig
 from dlt.common.runtime.collector import (
     AliveCollector,
     EnlightenCollector,
     LogCollector,
     TqdmCollector,
 )
+from dlt.common.storages import FileStorage
+
 from dlt.extract.storage import ExtractStorage
 from dlt.extract.validation import PydanticValidator
 
 from dlt.pipeline import TCollectorArg
 
+from tests.utils import TEST_STORAGE_ROOT, test_storage
 from tests.extract.utils import expect_extracted_file
 from tests.load.utils import DestinationTestConfiguration, destinations_configs
 from tests.pipeline.utils import assert_load_info, load_data_table_counts, many_delayed
@@ -386,3 +401,69 @@ class Parent(BaseModel):
             }
 
             assert loaded_values == {"data_dictionary__child_attribute": "any string"}
+
+
+@pytest.mark.skipif(
+    importlib.util.find_spec("pandas") is not None,
+    reason="Test skipped because pandas IS installed",
+)
+def test_arrow_no_pandas() -> None:
+    import pyarrow as pa
+
+    data = {
+        "Numbers": [1, 2, 3, 4, 5],
+        "Strings": ["apple", "banana", "cherry", "date", "elderberry"],
+    }
+
+    df = pa.table(data)
+
+    @dlt.resource
+    def pandas_incremental(numbers=dlt.sources.incremental("Numbers")):
+        yield df
+
+    info = dlt.run(
+        pandas_incremental(), write_disposition="append", table_name="data", destination="duckdb"
+    )
+
+    with info.pipeline.sql_client() as client:  # type: ignore
+        with client.execute_query("SELECT * FROM data") as c:
+            with pytest.raises(ImportError):
+                df = c.df()
+
+
+def test_empty_parquet(test_storage: FileStorage) -> None:
+    from dlt.destinations import filesystem
+
+    local = filesystem(os.path.abspath(TEST_STORAGE_ROOT))
+
+    # we have two options to materialize columns: add columns hint or use dlt.mark to emit schema
+    # at runtime. below we use the second option
+
+    @dlt.resource
+    def users():
+        yield dlt.mark.with_hints(
+            # this is a special empty item which will materialize table schema
+            dlt.mark.materialize_table_schema(),
+            # emit table schema with the item
+            dlt.mark.make_hints(
+                columns=[
+                    {"name": "id", "data_type": "bigint", "precision": 4, "nullable": False},
+                    {"name": "name", "data_type": "text", "nullable": False},
+                ]
+            ),
+        )
+
+    # write parquet file to storage
+    info = dlt.run(users, destination=local, loader_file_format="parquet", dataset_name="user_data")
+    assert_load_info(info)
+    assert set(info.pipeline.default_schema.tables["users"]["columns"].keys()) == {"id", "name", "_dlt_load_id", "_dlt_id"}  # type: ignore
+    # find parquet file
+    files = test_storage.list_folder_files("user_data/users")
+    assert len(files) == 1
+
+    # check rows and schema
+    import pyarrow.parquet as pq
+
+    table = pq.read_table(os.path.abspath(test_storage.make_full_path(files[0])))
+    assert table.num_rows == 0
+    assert set(table.schema.names) == {"id", "name", "_dlt_load_id", "_dlt_id"}
diff --git a/tests/pipeline/test_pipeline_state.py b/tests/pipeline/test_pipeline_state.py
index ee788367e1..f0bcda2717 100644
--- a/tests/pipeline/test_pipeline_state.py
+++ b/tests/pipeline/test_pipeline_state.py
@@ -14,7 +14,11 @@
 
 from dlt.pipeline.exceptions import PipelineStateEngineNoUpgradePathException, PipelineStepFailed
 from dlt.pipeline.pipeline import Pipeline
-from dlt.pipeline.state_sync import generate_version_hash, migrate_state, STATE_ENGINE_VERSION
+from dlt.pipeline.state_sync import (
+    generate_pipeline_state_version_hash,
+    migrate_pipeline_state,
+    PIPELINE_STATE_ENGINE_VERSION,
+)
 
 from tests.utils import test_storage
 from tests.pipeline.utils import json_case_path, load_json_case
@@ -482,21 +486,21 @@ def transform(item):
     )
 
 
-def test_migrate_state(test_storage: FileStorage) -> None:
+def test_migrate_pipeline_state(test_storage: FileStorage) -> None:
     # test generation of version hash on migration to v3
     state_v1 = load_json_case("state/state.v1")
-    state = migrate_state("test_pipeline", state_v1, state_v1["_state_engine_version"], 3)
+    state = migrate_pipeline_state("test_pipeline", state_v1, state_v1["_state_engine_version"], 3)
     assert state["_state_engine_version"] == 3
     assert "_local" in state
     assert "_version_hash" in state
-    assert state["_version_hash"] == generate_version_hash(state)
+    assert state["_version_hash"] == generate_pipeline_state_version_hash(state)
 
     # full migration
     state_v1 = load_json_case("state/state.v1")
-    state = migrate_state(
-        "test_pipeline", state_v1, state_v1["_state_engine_version"], STATE_ENGINE_VERSION
+    state = migrate_pipeline_state(
+        "test_pipeline", state_v1, state_v1["_state_engine_version"], PIPELINE_STATE_ENGINE_VERSION
     )
-    assert state["_state_engine_version"] == STATE_ENGINE_VERSION
+    assert state["_state_engine_version"] == PIPELINE_STATE_ENGINE_VERSION
 
     # check destination migration
     assert state["destination_name"] == "postgres"
@@ -505,12 +509,15 @@ def test_migrate_state(test_storage: FileStorage) -> None:
 
     with pytest.raises(PipelineStateEngineNoUpgradePathException) as py_ex:
         state_v1 = load_json_case("state/state.v1")
-        migrate_state(
-            "test_pipeline", state_v1, state_v1["_state_engine_version"], STATE_ENGINE_VERSION + 1
+        migrate_pipeline_state(
+            "test_pipeline",
+            state_v1,
+            state_v1["_state_engine_version"],
+            PIPELINE_STATE_ENGINE_VERSION + 1,
         )
     assert py_ex.value.init_engine == state_v1["_state_engine_version"]
-    assert py_ex.value.from_engine == STATE_ENGINE_VERSION
-    assert py_ex.value.to_engine == STATE_ENGINE_VERSION + 1
+    assert py_ex.value.from_engine == PIPELINE_STATE_ENGINE_VERSION
+    assert py_ex.value.to_engine == PIPELINE_STATE_ENGINE_VERSION + 1
 
     # also test pipeline init where state is old
     test_storage.create_folder("debug_pipeline")
@@ -522,7 +529,7 @@ def test_migrate_state(test_storage: FileStorage) -> None:
     assert p.dataset_name == "debug_pipeline_data"
     assert p.default_schema_name == "example_source"
     state = p.state
-    assert state["_version_hash"] == generate_version_hash(state)
+    assert state["_version_hash"] == generate_pipeline_state_version_hash(state)
 
     # specifically check destination v3 to v4 migration
     state_v3 = {
@@ -530,8 +537,8 @@ def test_migrate_state(test_storage: FileStorage) -> None:
         "staging": "dlt.destinations.filesystem",
         "_state_engine_version": 3,
     }
-    migrate_state(
-        "test_pipeline", state_v3, state_v3["_state_engine_version"], STATE_ENGINE_VERSION  # type: ignore
+    migrate_pipeline_state(
+        "test_pipeline", state_v3, state_v3["_state_engine_version"], PIPELINE_STATE_ENGINE_VERSION  # type: ignore
     )
     assert state_v3["destination_name"] == "redshift"
     assert state_v3["destination_type"] == "dlt.destinations.redshift"
@@ -544,8 +551,8 @@ def test_migrate_state(test_storage: FileStorage) -> None:
         "destination": "dlt.destinations.redshift",
         "_state_engine_version": 3,
     }
-    migrate_state(
-        "test_pipeline", state_v3, state_v3["_state_engine_version"], STATE_ENGINE_VERSION  # type: ignore
+    migrate_pipeline_state(
+        "test_pipeline", state_v3, state_v3["_state_engine_version"], PIPELINE_STATE_ENGINE_VERSION  # type: ignore
     )
     assert state_v3["destination_name"] == "redshift"
     assert state_v3["destination_type"] == "dlt.destinations.redshift"
@@ -554,8 +561,8 @@ def test_migrate_state(test_storage: FileStorage) -> None:
     assert "staging_type" not in state_v3
 
     state_v3 = {"destination": None, "staging": None, "_state_engine_version": 3}
-    migrate_state(
-        "test_pipeline", state_v3, state_v3["_state_engine_version"], STATE_ENGINE_VERSION  # type: ignore
+    migrate_pipeline_state(
+        "test_pipeline", state_v3, state_v3["_state_engine_version"], PIPELINE_STATE_ENGINE_VERSION  # type: ignore
     )
     assert "destination_name" not in state_v3
     assert "destination_type" not in state_v3
@@ -563,8 +570,8 @@ def test_migrate_state(test_storage: FileStorage) -> None:
     assert "staging_type" not in state_v3
 
     state_v3 = {"_state_engine_version": 2}
-    migrate_state(
-        "test_pipeline", state_v3, state_v3["_state_engine_version"], STATE_ENGINE_VERSION  # type: ignore
+    migrate_pipeline_state(
+        "test_pipeline", state_v3, state_v3["_state_engine_version"], PIPELINE_STATE_ENGINE_VERSION  # type: ignore
     )
     assert "destination_name" not in state_v3
     assert "destination_type" not in state_v3
diff --git a/tests/utils.py b/tests/utils.py
index dd03279def..924f44de73 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -45,10 +45,11 @@
     "motherduck",
     "mssql",
     "qdrant",
+    "destination",
     "synapse",
     "databricks",
 }
-NON_SQL_DESTINATIONS = {"filesystem", "weaviate", "dummy", "motherduck", "qdrant"}
+NON_SQL_DESTINATIONS = {"filesystem", "weaviate", "dummy", "motherduck", "qdrant", "destination"}
 SQL_DESTINATIONS = IMPLEMENTED_DESTINATIONS - NON_SQL_DESTINATIONS
 
 # exclude destination configs (for now used for athena and athena iceberg separation)
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tools/check-lockfile.py b/tools/check-lockfile.py
new file mode 100755
index 0000000000..23de6abd55
--- /dev/null
+++ b/tools/check-lockfile.py
@@ -0,0 +1,24 @@
+import sys
+
+# File and string to search for
+lockfile_name = "poetry.lock"
+hash_string = "hash = "
+threshold = 100
+
+try:
+    count = 0
+    with open(lockfile_name, 'r', encoding="utf8") as file:
+        for line in file:
+            if hash_string in line:
+                count += 1
+                if count >= threshold:
+                    print(f"Success: Found '{hash_string}' more than {threshold} times in {lockfile_name}.")
+                    sys.exit(0)
+                    
+    # If the loop completes without early exit, it means the threshold was not reached
+    print(f"Error: The string '{hash_string}' appears less than {threshold} times in {lockfile_name}, please make sure you are using an up to date poetry version.")
+    sys.exit(1)
+
+except FileNotFoundError:
+    print(f"Error: File {lockfile_name} does not exist.")
+    sys.exit(1)
\ No newline at end of file
diff --git a/check-package.sh b/tools/check-package.sh
similarity index 100%
rename from check-package.sh
rename to tools/check-package.sh
diff --git a/poetry-deps.sh b/tools/poetry-deps.sh
similarity index 100%
rename from poetry-deps.sh
rename to tools/poetry-deps.sh