From d2e504533c14fc133c5c941a928f4a51bd92a48c Mon Sep 17 00:00:00 2001
From: Marcin Rudolf <rudolfix@rudolfix.org>
Date: Thu, 26 Sep 2024 18:17:18 +0200
Subject: [PATCH 01/29] shows sqlalchemy docs

---
 docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md | 2 +-
 docs/website/sidebars.js                                   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md b/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md
index a3b19377da..b9014e0564 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md
@@ -1,5 +1,5 @@
 ---
-title: SQL databases (powered by SQLAlchemy)
+title: 30+ SQL databases (powered by SQLAlchemy)
 description: SQLAlchemy destination
 keywords: [sql, sqlalchemy, database, destination]
 ---
diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js
index 23c8d192ba..7e6000a2ca 100644
--- a/docs/website/sidebars.js
+++ b/docs/website/sidebars.js
@@ -171,6 +171,7 @@ const sidebars = {
         'dlt-ecosystem/destinations/redshift',
         'dlt-ecosystem/destinations/snowflake',
         'dlt-ecosystem/destinations/athena',
+        'dlt-ecosystem/destinations/sqlalchemy',
         'dlt-ecosystem/destinations/weaviate',
         'dlt-ecosystem/destinations/lancedb',
         'dlt-ecosystem/destinations/qdrant',

From 873f6befe99e833c5c3e9e590885a5457eb73c1e Mon Sep 17 00:00:00 2001
From: David Scharf <shrps@posteo.net>
Date: Fri, 27 Sep 2024 11:10:45 +0200
Subject: [PATCH 02/29] Fix config sections for synching destinations and
 accessing destination clients (#1887)

* add config section for getting pipeline clients

* add config section for sync_destination

* prefers existing sections in pipeline

---------

Co-authored-by: Marcin Rudolf <rudolfix@rudolfix.org>
---
 dlt/pipeline/pipeline.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py
index fa10f5ac89..54e576b5fc 100644
--- a/dlt/pipeline/pipeline.py
+++ b/dlt/pipeline/pipeline.py
@@ -257,13 +257,17 @@ def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any:
     return decorator
 
 
-def with_config_section(sections: Tuple[str, ...]) -> Callable[[TFun], TFun]:
+def with_config_section(
+    sections: Tuple[str, ...], merge_func: ConfigSectionContext.TMergeFunc = None
+) -> Callable[[TFun], TFun]:
     def decorator(f: TFun) -> TFun:
         @wraps(f)
         def _wrap(self: "Pipeline", *args: Any, **kwargs: Any) -> Any:
             # add section context to the container to be used by all configuration without explicit sections resolution
             with inject_section(
-                ConfigSectionContext(pipeline_name=self.pipeline_name, sections=sections)
+                ConfigSectionContext(
+                    pipeline_name=self.pipeline_name, sections=sections, merge_style=merge_func
+                )
             ):
                 return f(self, *args, **kwargs)
 
@@ -678,7 +682,7 @@ def run(
             and not self._state_restored
             and (self.destination or destination)
         ):
-            self.sync_destination(destination, staging, dataset_name)
+            self._sync_destination(destination, staging, dataset_name)
             # sync only once
             self._state_restored = True
         # normalize and load pending data
@@ -712,7 +716,7 @@ def run(
         else:
             return None
 
-    @with_schemas_sync
+    @with_config_section(sections=None, merge_func=ConfigSectionContext.prefer_existing)
     def sync_destination(
         self,
         destination: TDestinationReferenceArg = None,
@@ -730,6 +734,17 @@ def sync_destination(
         Note: this method is executed by the `run` method before any operation on data. Use `restore_from_destination` configuration option to disable that behavior.
 
         """
+        return self._sync_destination(
+            destination=destination, staging=staging, dataset_name=dataset_name
+        )
+
+    @with_schemas_sync
+    def _sync_destination(
+        self,
+        destination: TDestinationReferenceArg = None,
+        staging: TDestinationReferenceArg = None,
+        dataset_name: str = None,
+    ) -> None:
         self._set_destinations(destination=destination, staging=staging)
         self._set_dataset_name(dataset_name)
 
@@ -969,6 +984,7 @@ def get_local_state_val(self, key: str) -> Any:
             state = self._get_state()
         return state["_local"][key]  # type: ignore
 
+    @with_config_section(sections=None, merge_func=ConfigSectionContext.prefer_existing)
     def sql_client(self, schema_name: str = None) -> SqlClientBase[Any]:
         """Returns a sql client configured to query/change the destination and dataset that were used to load the data.
         Use the client with `with` statement to manage opening and closing connection to the destination:
@@ -1008,6 +1024,7 @@ def _fs_client(self, schema_name: str = None) -> FSClientBase:
             return client
         raise FSClientNotAvailable(self.pipeline_name, self.destination.destination_name)
 
+    @with_config_section(sections=None, merge_func=ConfigSectionContext.prefer_existing)
     def destination_client(self, schema_name: str = None) -> JobClientBase:
         """Get the destination job client for the configured destination
         Use the client with `with` statement to manage opening and closing connection to the destination:

From 5bbf0192c8bd79c1eeb12cd02c4633957a123886 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Willi=20M=C3=BCller?= <willi.mueller@posteo.de>
Date: Fri, 27 Sep 2024 22:28:37 +0530
Subject: [PATCH 03/29] refactors Session mocking in tests to mocker.patch and
 mocker.spy API (#1891)

---
 tests/sources/helpers/rest_client/test_client.py   | 13 +++----------
 tests/sources/rest_api/integration/test_offline.py |  6 +-----
 2 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/tests/sources/helpers/rest_client/test_client.py b/tests/sources/helpers/rest_client/test_client.py
index 5ec48e2972..488d7ef525 100644
--- a/tests/sources/helpers/rest_client/test_client.py
+++ b/tests/sources/helpers/rest_client/test_client.py
@@ -444,29 +444,22 @@ def test_configurable_timeout(self, mocker) -> None:
 
         import requests
 
-        original_send = requests.Session.send
-        requests.Session.send = mocker.Mock()  # type: ignore[method-assign]
+        mocked_send = mocker.patch.object(requests.Session, "send")
         rest_client.get("/posts/1")
-        assert requests.Session.send.call_args[1] == {  # type: ignore[attr-defined]
+        assert mocked_send.call_args[1] == {
             "timeout": 42,
             "proxies": ANY,
             "stream": ANY,
             "verify": ANY,
             "cert": ANY,
         }
-        # restore, otherwise side-effect on subsequent tests
-        requests.Session.send = original_send  # type: ignore[method-assign]
 
     def test_request_kwargs(self, mocker) -> None:
-        def send_spy(*args, **kwargs):
-            return original_send(*args, **kwargs)
-
         rest_client = RESTClient(
             base_url="https://api.example.com",
             session=Client().session,
         )
-        original_send = rest_client.session.send
-        mocked_send = mocker.patch.object(rest_client.session, "send", side_effect=send_spy)
+        mocked_send = mocker.spy(rest_client.session, "send")
 
         rest_client.get(
             path="/posts/1",
diff --git a/tests/sources/rest_api/integration/test_offline.py b/tests/sources/rest_api/integration/test_offline.py
index 57cffc99d0..cb91e0d680 100644
--- a/tests/sources/rest_api/integration/test_offline.py
+++ b/tests/sources/rest_api/integration/test_offline.py
@@ -373,12 +373,8 @@ def test_multiple_response_actions_on_every_response(mock_api_server, mocker):
     class CustomSession(Session):
         pass
 
-    def send_spy(*args, **kwargs):
-        return original_send(*args, **kwargs)
-
     my_session = CustomSession()
-    original_send = my_session.send
-    mocked_send = mocker.patch.object(my_session, "send", side_effect=send_spy)
+    mocked_send = mocker.spy(my_session, "send")
 
     source = rest_api_source(
         {

From 3f1938aa0e260b2d859650a9670553e64719e7c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Willi=20M=C3=BCller?= <willi.mueller@posteo.de>
Date: Sun, 29 Sep 2024 23:05:55 +0530
Subject: [PATCH 04/29] corrects test suite (#1893)

---
 tests/cli/test_init_command.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/cli/test_init_command.py b/tests/cli/test_init_command.py
index e85c4593f6..f76dc2f053 100644
--- a/tests/cli/test_init_command.py
+++ b/tests/cli/test_init_command.py
@@ -63,7 +63,7 @@
 TEMPLATES = ["debug", "default", "arrow", "requests", "dataframe", "intro"]
 
 # a few verified sources we know to exist
-SOME_KNOWN_VERIFIED_SOURCES = ["chess", "sql_database", "google_sheets", "pipedrive"]
+SOME_KNOWN_VERIFIED_SOURCES = ["chess", "google_sheets", "pipedrive"]
 
 
 def get_verified_source_candidates(repo_dir: str) -> List[str]:
@@ -150,7 +150,7 @@ def check_results(items: Dict[str, SourceConfiguration]) -> None:
     check_results(core_sources)
 
     verified_sources = _list_verified_sources(DEFAULT_VERIFIED_SOURCES_REPO)
-    assert set(SOME_KNOWN_VERIFIED_SOURCES).issubset(verified_sources)
+    assert set(SOME_KNOWN_VERIFIED_SOURCES).issubset(verified_sources.keys())
     check_results(verified_sources)
     assert len(verified_sources.keys()) > 10
 

From cb450466346761e7da0e7f67666e0933654a7b8e Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <47451109+jorritsandbrink@users.noreply.github.com>
Date: Sun, 29 Sep 2024 23:48:41 +0400
Subject: [PATCH 05/29] incremental `scd2` with `merge_key` (#1818)

* remove unused imports

* add scd2 retire_if_absent option

* rewrite scd2 retire logic

* include new keys in typing

* finetune scd2 typing

* update typeddict validation test

* rename to retire_absent_rows

* add reinsert test case

* replace natural_key with merge_key

* rewrite natural key presence check

* simplify scd2 test and remove redundancy

* set constants once

* document incremental scd2

* remove natural_key remnants

* remove `retire_absent_rows` flag

* add scd2 merge key partition test

* fix typos

* update incremental scd2 docs
---
 dlt/common/schema/typing.py                   |   9 +-
 dlt/destinations/impl/athena/athena.py        |   6 +
 dlt/destinations/sql_jobs.py                  |  34 +-
 dlt/extract/hints.py                          |  38 +-
 .../docs/general-usage/incremental-loading.md | 127 +++++-
 tests/common/test_validation.py               |   8 +-
 tests/load/pipeline/test_scd2.py              | 376 +++++++++++++-----
 7 files changed, 462 insertions(+), 136 deletions(-)

diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py
index 2247358331..7174d1b5c7 100644
--- a/dlt/common/schema/typing.py
+++ b/dlt/common/schema/typing.py
@@ -232,15 +232,20 @@ class TWriteDispositionDict(TypedDict):
     disposition: TWriteDisposition
 
 
-class TMergeDispositionDict(TWriteDispositionDict, total=False):
+class TMergeDispositionDict(TWriteDispositionDict):
     strategy: Optional[TLoaderMergeStrategy]
+
+
+class TScd2StrategyDict(TMergeDispositionDict, total=False):
     validity_column_names: Optional[List[str]]
     active_record_timestamp: Optional[TAnyDateTime]
     boundary_timestamp: Optional[TAnyDateTime]
     row_version_column_name: Optional[str]
 
 
-TWriteDispositionConfig = Union[TWriteDisposition, TWriteDispositionDict, TMergeDispositionDict]
+TWriteDispositionConfig = Union[
+    TWriteDisposition, TWriteDispositionDict, TMergeDispositionDict, TScd2StrategyDict
+]
 
 
 class _TTableSchemaBase(TTableProcessingHints, total=False):
diff --git a/dlt/destinations/impl/athena/athena.py b/dlt/destinations/impl/athena/athena.py
index 04078dd510..72611a9568 100644
--- a/dlt/destinations/impl/athena/athena.py
+++ b/dlt/destinations/impl/athena/athena.py
@@ -149,6 +149,12 @@ def gen_delete_temp_table_sql(
         sql.insert(0, f"""DROP TABLE IF EXISTS {temp_table_name.replace('"', '`')};""")
         return sql, temp_table_name
 
+    @classmethod
+    def gen_concat_sql(cls, columns: Sequence[str]) -> str:
+        # Athena requires explicit casting
+        columns = [f"CAST({c} AS VARCHAR)" for c in columns]
+        return f"CONCAT({', '.join(columns)})"
+
     @classmethod
     def requires_temp_table_for_delete(cls) -> bool:
         return True
diff --git a/dlt/destinations/sql_jobs.py b/dlt/destinations/sql_jobs.py
index 2407d2db62..ae27213a7c 100644
--- a/dlt/destinations/sql_jobs.py
+++ b/dlt/destinations/sql_jobs.py
@@ -339,6 +339,10 @@ def gen_delete_from_sql(
             );
         """
 
+    @classmethod
+    def gen_concat_sql(cls, columns: Sequence[str]) -> str:
+        return f"CONCAT({', '.join(columns)})"
+
     @classmethod
     def _shorten_table_name(cls, ident: str, sql_client: SqlClientBase[Any]) -> str:
         """Trims identifier to max length supported by sql_client. Used for dynamically constructed table names"""
@@ -755,19 +759,35 @@ def gen_scd2_sql(
         active_record_timestamp = get_active_record_timestamp(root_table)
         if active_record_timestamp is None:
             active_record_literal = "NULL"
-            is_active_clause = f"{to} IS NULL"
+            is_active = f"{to} IS NULL"
         else:  # it's a datetime
             active_record_literal = format_datetime_literal(
                 active_record_timestamp, caps.timestamp_precision
             )
-            is_active_clause = f"{to} = {active_record_literal}"
+            is_active = f"{to} = {active_record_literal}"
 
-        # retire updated and deleted records
-        sql.append(f"""
+        # retire records:
+        # - no `merge_key`: retire all absent records
+        # - yes `merge_key`: retire those absent records whose `merge_key`
+        # is present in staging data
+        retire_sql = f"""
             {cls.gen_update_table_prefix(root_table_name)} {to} = {boundary_literal}
-            WHERE {is_active_clause}
+            WHERE {is_active}
             AND {hash_} NOT IN (SELECT {hash_} FROM {staging_root_table_name});
-        """)
+        """
+        merge_keys = cls._escape_list(
+            get_columns_names_with_prop(root_table, "merge_key"),
+            escape_column_id,
+        )
+        if len(merge_keys) > 0:
+            if len(merge_keys) == 1:
+                key = merge_keys[0]
+            else:
+                key = cls.gen_concat_sql(merge_keys)  # compound key
+            key_present = f"{key} IN (SELECT {key} FROM {staging_root_table_name})"
+            retire_sql = retire_sql.rstrip()[:-1]  # remove semicolon
+            retire_sql += f" AND {key_present};"
+        sql.append(retire_sql)
 
         # insert new active records in root table
         columns = map(escape_column_id, list(root_table["columns"].keys()))
@@ -776,7 +796,7 @@ def gen_scd2_sql(
             INSERT INTO {root_table_name} ({col_str}, {from_}, {to})
             SELECT {col_str}, {boundary_literal} AS {from_}, {active_record_literal} AS {to}
             FROM {staging_root_table_name} AS s
-            WHERE {hash_} NOT IN (SELECT {hash_} FROM {root_table_name} WHERE {is_active_clause});
+            WHERE {hash_} NOT IN (SELECT {hash_} FROM {root_table_name} WHERE {is_active});
         """)
 
         # insert list elements for new active records in nested tables
diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py
index 037ebbddf9..2774e17353 100644
--- a/dlt/extract/hints.py
+++ b/dlt/extract/hints.py
@@ -12,6 +12,7 @@
     TTableSchemaColumns,
     TWriteDispositionConfig,
     TMergeDispositionDict,
+    TScd2StrategyDict,
     TAnySchemaColumns,
     TTableFormat,
     TSchemaContract,
@@ -352,7 +353,7 @@ def _set_hints(
         self, hints_template: TResourceHints, create_table_variant: bool = False
     ) -> None:
         DltResourceHints.validate_dynamic_hints(hints_template)
-        DltResourceHints.validate_write_disposition_hint(hints_template.get("write_disposition"))
+        DltResourceHints.validate_write_disposition_hint(hints_template)
         if create_table_variant:
             table_name: str = hints_template["name"]  # type: ignore[assignment]
             # incremental cannot be specified in variant
@@ -452,10 +453,11 @@ def _merge_merge_disposition_dict(dict_: Dict[str, Any]) -> None:
         md_dict: TMergeDispositionDict = dict_.pop("write_disposition")
         if merge_strategy := md_dict.get("strategy"):
             dict_["x-merge-strategy"] = merge_strategy
-        if "boundary_timestamp" in md_dict:
-            dict_["x-boundary-timestamp"] = md_dict["boundary_timestamp"]
-        # add columns for `scd2` merge strategy
+
         if merge_strategy == "scd2":
+            md_dict = cast(TScd2StrategyDict, md_dict)
+            if "boundary_timestamp" in md_dict:
+                dict_["x-boundary-timestamp"] = md_dict["boundary_timestamp"]
             if md_dict.get("validity_column_names") is None:
                 from_, to = DEFAULT_VALIDITY_COLUMN_NAMES
             else:
@@ -514,7 +516,8 @@ def validate_dynamic_hints(template: TResourceHints) -> None:
             )
 
     @staticmethod
-    def validate_write_disposition_hint(wd: TTableHintTemplate[TWriteDispositionConfig]) -> None:
+    def validate_write_disposition_hint(template: TResourceHints) -> None:
+        wd = template.get("write_disposition")
         if isinstance(wd, dict) and wd["disposition"] == "merge":
             wd = cast(TMergeDispositionDict, wd)
             if "strategy" in wd and wd["strategy"] not in MERGE_STRATEGIES:
@@ -523,13 +526,18 @@ def validate_write_disposition_hint(wd: TTableHintTemplate[TWriteDispositionConf
                     f"""Allowed values: {', '.join(['"' + s + '"' for s in MERGE_STRATEGIES])}."""
                 )
 
-            for ts in ("active_record_timestamp", "boundary_timestamp"):
-                if ts == "active_record_timestamp" and wd.get("active_record_timestamp") is None:
-                    continue  # None is allowed for active_record_timestamp
-                if ts in wd:
-                    try:
-                        ensure_pendulum_datetime(wd[ts])  # type: ignore[literal-required]
-                    except Exception:
-                        raise ValueError(
-                            f'could not parse `{ts}` value "{wd[ts]}"'  # type: ignore[literal-required]
-                        )
+            if wd.get("strategy") == "scd2":
+                wd = cast(TScd2StrategyDict, wd)
+                for ts in ("active_record_timestamp", "boundary_timestamp"):
+                    if (
+                        ts == "active_record_timestamp"
+                        and wd.get("active_record_timestamp") is None
+                    ):
+                        continue  # None is allowed for active_record_timestamp
+                    if ts in wd:
+                        try:
+                            ensure_pendulum_datetime(wd[ts])  # type: ignore[literal-required]
+                        except Exception:
+                            raise ValueError(
+                                f'could not parse `{ts}` value "{wd[ts]}"'  # type: ignore[literal-required]
+                            )
diff --git a/docs/website/docs/general-usage/incremental-loading.md b/docs/website/docs/general-usage/incremental-loading.md
index 88f009e3c2..c8f92cf154 100644
--- a/docs/website/docs/general-usage/incremental-loading.md
+++ b/docs/website/docs/general-usage/incremental-loading.md
@@ -223,7 +223,7 @@ info = pipeline.run(fb_ads.with_resources("ads"), write_disposition="merge")
 In the example above, we enforce the root key propagation with `fb_ads.root_key = True`. This ensures that the correct data is propagated on the initial `replace` load so the future `merge` load can be executed. You can achieve the same in the decorator `@dlt.source(root_key=True)`.
 
 ### `scd2` strategy
-`dlt` can create [Slowly Changing Dimension Type 2](https://en.wikipedia.org/wiki/Slowly_changing_dimension#Type_2:_add_new_row) (SCD2) destination tables for dimension tables that change in the source. The resource is expected to provide a full extract of the source table each run. A row hash is stored in `_dlt_id` and used as a surrogate key to identify source records that have been inserted, updated, or deleted. A `NULL` value is used by default to indicate an active record, but it's possible to use a configurable high timestamp (e.g., 9999-12-31 00:00:00.000000) instead.
+`dlt` can create [Slowly Changing Dimension Type 2](https://en.wikipedia.org/wiki/Slowly_changing_dimension#Type_2:_add_new_row) (SCD2) destination tables for dimension tables that change in the source. By default, the resource is expected to provide a full extract of the source table each run, but [incremental extracts](#example-incremental-scd2) are also possible. A row hash is stored in `_dlt_id` and used as surrogate key to identify source records that have been inserted, updated, or deleted. A `NULL` value is used by default to indicate an active record, but it's possible to use a configurable high timestamp (e.g. 9999-12-31 00:00:00.000000) instead.
 
 :::note
 The `unique` hint for `_dlt_id` in the root table is set to `false` when using `scd2`. This differs from [default behavior](./destination-tables.md#child-and-parent-tables). The reason is that the surrogate key stored in `_dlt_id` contains duplicates after an _insert-delete-reinsert_ pattern:
@@ -300,6 +300,131 @@ pipeline.run(dim_customer())  # third run — 2024-04-10 06:45:22.847403
 | 2024-04-09 18:27:53.734235 | **2024-04-10 06:45:22.847403** | 2 | bar | 2 |
 | 2024-04-09 22:13:07.943703 | NULL | 1 | foo_updated | 1 |
 
+#### Example: incremental `scd2`
+A `merge_key` can be provided to work with incremental extracts instead of full extracts. The `merge_key` lets you define which absent rows are considered "deleted". Compound natural keys are allowed and can be specified by providing a list of column names as `merge_key`.
+
+*Case 1: do not retire absent records*
+
+You can set the natural key as `merge_key` to prevent retirement of absent rows. In this case you don't consider any absent row deleted. Records are not retired in the destination if their corresponding natural keys are not present in the source extract. This allows for incremental extracts that only contain updated records.
+
+```py
+@dlt.resource(
+    merge_key="customer_key",
+    write_disposition={"disposition": "merge", "strategy": "scd2"}
+)
+def dim_customer():
+    # initial load
+    yield [
+        {"customer_key": 1, "c1": "foo", "c2": 1},
+        {"customer_key": 2, "c1": "bar", "c2": 2}
+    ]
+
+pipeline.run(dim_customer())  # first run — 2024-04-09 18:27:53.734235
+...
+```
+*`dim_customer` destination table after first run:*
+
+| `_dlt_valid_from` | `_dlt_valid_to` | `customer_key` | `c1` | `c2` |
+| -- | -- | -- | -- | -- |
+| 2024-04-09 18:27:53.734235 | NULL | 1 | foo | 1 |
+| 2024-04-09 18:27:53.734235 | NULL | 2 | bar | 2 |
+
+```py
+...
+def dim_customer():
+    # second load — record for customer_key 1 got updated, customer_key 2 absent
+    yield [
+        {"customer_key": 1, "c1": "foo_updated", "c2": 1},
+]
+
+pipeline.run(dim_customer())  # second run — 2024-04-09 22:13:07.943703
+```
+
+*`dim_customer` destination table after second run—customer key 2 was not retired:*
+
+| `_dlt_valid_from` | `_dlt_valid_to` | `customer_key` | `c1` | `c2` |
+| -- | -- | -- | -- | -- |
+| 2024-04-09 18:27:53.734235 | **2024-04-09 22:13:07.943703** | 1 | foo | 1 |
+| 2024-04-09 18:27:53.734235 | NULL | 2 | bar | 2 |
+| **2024-04-09 22:13:07.943703** | **NULL** | **1** | **foo_updated** | **1** |
+
+*Case 2: only retire records for given partitions*
+
+:::note
+Technically this is not SCD2 because the key used to merge records is not a natural key.
+:::
+
+You can set a "partition" column as `merge_key` to retire absent rows for given partitions. In this case you only consider absent rows deleted if their partition value is present in the extract. Physical partitioning of the table is not required—the word "partition" is used conceptually here.
+
+```py
+@dlt.resource(
+    merge_key="date",
+    write_disposition={"disposition": "merge", "strategy": "scd2"}
+)
+def some_data():
+    # load 1 — "2024-01-01" partition
+    yield [
+        {"date": "2024-01-01", "name": "a"},
+        {"date": "2024-01-01", "name": "b"},
+    ]
+
+pipeline.run(some_data())  # first run — 2024-01-02 03:03:35.854305
+...
+```
+
+*`some_data` destination table after first run:*
+
+| `_dlt_valid_from` | `_dlt_valid_to` | `date` | `name` |
+| -- | -- | -- | -- |
+| 2024-01-02 03:03:35.854305 | NULL | 2024-01-01 | a |
+| 2024-01-02 03:03:35.854305 | NULL | 2024-01-01 | b |
+
+```py
+...
+def some_data():
+    # load 2 — "2024-01-02" partition
+    yield [
+        {"date": "2024-01-02", "name": "c"},
+        {"date": "2024-01-02", "name": "d"},
+    ]
+
+pipeline.run(some_data())  # second run — 2024-01-03 03:01:11.943703
+...
+```
+
+*`some_data` destination table after second run—added 2024-01-02 records, did not touch 2024-01-01 records:*
+
+| `_dlt_valid_from` | `_dlt_valid_to` | `date` | `name` |
+| -- | -- | -- | -- |
+| 2024-01-02 03:03:35.854305 | NULL | 2024-01-01 | a |
+| 2024-01-02 03:03:35.854305 | NULL | 2024-01-01 | b |
+| **2024-01-03 03:01:11.943703** | **NULL** | **2024-01-02** | **c** |
+| **2024-01-03 03:01:11.943703** | **NULL** | **2024-01-02** | **d** |
+
+```py
+...
+def some_data():
+    # load 3 — reload "2024-01-01" partition
+    yield [
+        {"date": "2024-01-01", "name": "a"},  # unchanged
+        {"date": "2024-01-01", "name": "bb"},  # new
+    ]
+
+pipeline.run(some_data())  # third run — 2024-01-03 10:30:05.750356
+...
+```
+
+*`some_data` destination table after third run—retired b, added bb, did not touch 2024-01-02 partition:*
+
+| `_dlt_valid_from` | `_dlt_valid_to` | `date` | `name` |
+| -- | -- | -- | -- |
+| 2024-01-02 03:03:35.854305 | NULL | 2024-01-01 | a |
+| 2024-01-02 03:03:35.854305 | **2024-01-03 10:30:05.750356** | 2024-01-01 | b |
+| 2024-01-03 03:01:11.943703 | NULL | 2024-01-02 | c |
+| 2024-01-03 03:01:11.943703 | NULL | 2024-01-02 | d |
+| **2024-01-03 10:30:05.750356** | **NULL** | **2024-01-01** | **bb** |
+
+
 #### Example: configure validity column names
 `_dlt_valid_from` and `_dlt_valid_to` are used by default as validity column names. Other names can be configured as follows:
 ```py
diff --git a/tests/common/test_validation.py b/tests/common/test_validation.py
index 0ecbbea89d..3f8ccfc20f 100644
--- a/tests/common/test_validation.py
+++ b/tests/common/test_validation.py
@@ -334,8 +334,8 @@ def test_typeddict_friendly_exceptions() -> None:
         wrong_dict["write_disposition"] = {"strategy": "scd2"}
         validate_dict(EndpointResource, wrong_dict, ".")
     print(e.value)
-    # Union of 3 types and callable
-    assert len(e.value.nested_exceptions) == 4
+    # Union of 4 types and callable
+    assert len(e.value.nested_exceptions) == 5
 
     # this has wrong disposition string
     with pytest.raises(DictValidationException) as e:
@@ -343,8 +343,8 @@ def test_typeddict_friendly_exceptions() -> None:
         wrong_dict["write_disposition"] = "unknown"  # type: ignore[assignment]
         validate_dict(EndpointResource, wrong_dict, ".")
     print(e.value)
-    # Union of 3 types and callable
-    assert len(e.value.nested_exceptions) == 4
+    # Union of 4 types and callable
+    assert len(e.value.nested_exceptions) == 5
 
     # this has wrong nested type
     with pytest.raises(DictValidationException) as e:
diff --git a/tests/load/pipeline/test_scd2.py b/tests/load/pipeline/test_scd2.py
index c75ff4d3e6..3e08b792ed 100644
--- a/tests/load/pipeline/test_scd2.py
+++ b/tests/load/pipeline/test_scd2.py
@@ -9,13 +9,12 @@
 from dlt.common.typing import TAnyDateTime
 from dlt.common.pendulum import pendulum
 from dlt.common.pipeline import LoadInfo
-from dlt.common.schema.exceptions import ColumnNameConflictException
+from dlt.common.data_types.typing import TDataType
 from dlt.common.schema.typing import DEFAULT_VALIDITY_COLUMN_NAMES
 from dlt.common.normalizers.json.relational import DataItemNormalizer
 from dlt.common.normalizers.naming.snake_case import NamingConvention as SnakeCaseNamingConvention
 from dlt.common.time import ensure_pendulum_datetime, reduce_pendulum_datetime_precision
 from dlt.extract.resource import DltResource
-from dlt.pipeline.exceptions import PipelineStepFailed
 
 from tests.cases import arrow_table_all_data_types
 from tests.load.utils import (
@@ -32,6 +31,7 @@
 from tests.utils import TPythonTableFormat
 
 get_row_hash = DataItemNormalizer.get_row_hash
+FROM, TO = DEFAULT_VALIDITY_COLUMN_NAMES
 
 
 def get_load_package_created_at(pipeline: dlt.Pipeline, load_info: LoadInfo) -> datetime:
@@ -74,40 +74,21 @@ def get_table(
 
 @pytest.mark.essential
 @pytest.mark.parametrize(
-    "destination_config,simple,validity_column_names,active_record_timestamp",
-    # test basic cases for alle SQL destinations supporting merge
-    [
-        (dconf, True, None, None)
-        for dconf in destinations_configs(default_sql_configs=True, supports_merge=True)
-    ]
-    + [
-        (dconf, True, None, pendulum.DateTime(2099, 12, 31, 22, 2, 59))  # arbitrary timestamp
-        for dconf in destinations_configs(default_sql_configs=True, supports_merge=True)
-    ]
-    + [  # test nested columns and validity column name configuration only for postgres and duckdb
-        (dconf, False, ["from", "to"], None)
-        for dconf in destinations_configs(default_sql_configs=True, subset=["postgres", "duckdb"])
-    ]
-    + [
-        (dconf, False, ["ValidFrom", "ValidTo"], None)
-        for dconf in destinations_configs(default_sql_configs=True, subset=["postgres", "duckdb"])
-    ],
-    ids=lambda x: (
-        x.name
-        if isinstance(x, DestinationTestConfiguration)
-        else (x[0] + "-" + x[1] if isinstance(x, list) else x)
-    ),
+    "destination_config",
+    destinations_configs(default_sql_configs=True, supports_merge=True),
+    ids=lambda x: x.name,
+)
+@pytest.mark.parametrize(
+    "validity_column_names",
+    [None, ["from", "to"], ["ValidFrom", "ValidTo"]],
+    ids=lambda x: x[0] + "-" + x[1] if isinstance(x, list) else x,
 )
 def test_core_functionality(
     destination_config: DestinationTestConfiguration,
-    simple: bool,
     validity_column_names: List[str],
-    active_record_timestamp: Optional[pendulum.DateTime],
 ) -> None:
-    # somehow destination_config comes through as ParameterSet instead of
-    # DestinationTestConfiguration
-    destination_config = destination_config.values[0]  # type: ignore[attr-defined]
-
+    if validity_column_names is not None and destination_config.destination_type != "postgres":
+        pytest.skip("test `validity_column_names` configuration only for `postgres`")
     p = destination_config.setup_pipeline("abstract", dev_mode=True)
 
     @dlt.resource(
@@ -116,7 +97,6 @@ def test_core_functionality(
             "disposition": "merge",
             "strategy": "scd2",
             "validity_column_names": validity_column_names,
-            "active_record_timestamp": active_record_timestamp,
         },
     )
     def r(data):
@@ -131,8 +111,8 @@ def r(data):
 
     # load 1 — initial load
     dim_snap = [
-        {"nk": 1, "c1": "foo", "c2": "foo" if simple else {"nc1": "foo"}},
-        {"nk": 2, "c1": "bar", "c2": "bar" if simple else {"nc1": "bar"}},
+        {"nk": 1, "c1": "foo", "c2": {"nc1": "foo"}},
+        {"nk": 2, "c1": "bar", "c2": {"nc1": "bar"}},
     ]
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     assert_load_info(info)
@@ -148,93 +128,92 @@ def r(data):
     # assert load results
     ts_1 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    cname = "c2" if simple else "c2__nc1"
-    assert get_table(p, "dim_test", cname) == [
+    assert get_table(p, "dim_test", "c2__nc1") == [
         {
             from_: ts_1,
-            to: active_record_timestamp,
+            to: None,
             "nk": 2,
             "c1": "bar",
-            cname: "bar",
+            "c2__nc1": "bar",
         },
         {
             from_: ts_1,
-            to: active_record_timestamp,
+            to: None,
             "nk": 1,
             "c1": "foo",
-            cname: "foo",
+            "c2__nc1": "foo",
         },
     ]
 
     # load 2 — update a record
     dim_snap = [
-        {"nk": 1, "c1": "foo", "c2": "foo_updated" if simple else {"nc1": "foo_updated"}},
-        {"nk": 2, "c1": "bar", "c2": "bar" if simple else {"nc1": "bar"}},
+        {"nk": 1, "c1": "foo", "c2": {"nc1": "foo_updated"}},
+        {"nk": 2, "c1": "bar", "c2": {"nc1": "bar"}},
     ]
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     ts_2 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    assert get_table(p, "dim_test", cname) == [
+    assert get_table(p, "dim_test", "c2__nc1") == [
         {
             from_: ts_1,
-            to: active_record_timestamp,
+            to: None,
             "nk": 2,
             "c1": "bar",
-            cname: "bar",
+            "c2__nc1": "bar",
         },
-        {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo", cname: "foo"},
+        {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo", "c2__nc1": "foo"},
         {
             from_: ts_2,
-            to: active_record_timestamp,
+            to: None,
             "nk": 1,
             "c1": "foo",
-            cname: "foo_updated",
+            "c2__nc1": "foo_updated",
         },
     ]
 
     # load 3 — delete a record
     dim_snap = [
-        {"nk": 1, "c1": "foo", "c2": "foo_updated" if simple else {"nc1": "foo_updated"}},
+        {"nk": 1, "c1": "foo", "c2": {"nc1": "foo_updated"}},
     ]
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     ts_3 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    assert get_table(p, "dim_test", cname) == [
-        {from_: ts_1, to: ts_3, "nk": 2, "c1": "bar", cname: "bar"},
-        {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo", cname: "foo"},
+    assert get_table(p, "dim_test", "c2__nc1") == [
+        {from_: ts_1, to: ts_3, "nk": 2, "c1": "bar", "c2__nc1": "bar"},
+        {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo", "c2__nc1": "foo"},
         {
             from_: ts_2,
-            to: active_record_timestamp,
+            to: None,
             "nk": 1,
             "c1": "foo",
-            cname: "foo_updated",
+            "c2__nc1": "foo_updated",
         },
     ]
 
     # load 4 — insert a record
     dim_snap = [
-        {"nk": 1, "c1": "foo", "c2": "foo_updated" if simple else {"nc1": "foo_updated"}},
-        {"nk": 3, "c1": "baz", "c2": "baz" if simple else {"nc1": "baz"}},
+        {"nk": 1, "c1": "foo", "c2": {"nc1": "foo_updated"}},
+        {"nk": 3, "c1": "baz", "c2": {"nc1": "baz"}},
     ]
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     ts_4 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    assert get_table(p, "dim_test", cname) == [
-        {from_: ts_1, to: ts_3, "nk": 2, "c1": "bar", cname: "bar"},
+    assert get_table(p, "dim_test", "c2__nc1") == [
+        {from_: ts_1, to: ts_3, "nk": 2, "c1": "bar", "c2__nc1": "bar"},
         {
             from_: ts_4,
-            to: active_record_timestamp,
+            to: None,
             "nk": 3,
             "c1": "baz",
-            cname: "baz",
+            "c2__nc1": "baz",
         },
-        {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo", cname: "foo"},
+        {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo", "c2__nc1": "foo"},
         {
             from_: ts_2,
-            to: active_record_timestamp,
+            to: None,
             "nk": 1,
             "c1": "foo",
-            cname: "foo_updated",
+            "c2__nc1": "foo_updated",
         },
     ]
 
@@ -255,9 +234,6 @@ def test_child_table(destination_config: DestinationTestConfiguration, simple: b
     def r(data):
         yield data
 
-    # get validity column names
-    from_, to = DEFAULT_VALIDITY_COLUMN_NAMES
-
     # load 1 — initial load
     dim_snap: List[Dict[str, Any]] = [
         l1_1 := {"nk": 1, "c1": "foo", "c2": [1] if simple else [{"cc1": 1}]},
@@ -267,8 +243,8 @@ def r(data):
     ts_1 = get_load_package_created_at(p, info)
     assert_load_info(info)
     assert get_table(p, "dim_test", "c1") == [
-        {from_: ts_1, to: None, "nk": 2, "c1": "bar"},
-        {from_: ts_1, to: None, "nk": 1, "c1": "foo"},
+        {FROM: ts_1, TO: None, "nk": 2, "c1": "bar"},
+        {FROM: ts_1, TO: None, "nk": 1, "c1": "foo"},
     ]
     cname = "value" if simple else "cc1"
     assert get_table(p, "dim_test__c2", cname) == [
@@ -286,9 +262,9 @@ def r(data):
     ts_2 = get_load_package_created_at(p, info)
     assert_load_info(info)
     assert get_table(p, "dim_test", "c1") == [
-        {from_: ts_1, to: None, "nk": 2, "c1": "bar"},
-        {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo"},  # updated
-        {from_: ts_2, to: None, "nk": 1, "c1": "foo_updated"},  # new
+        {FROM: ts_1, TO: None, "nk": 2, "c1": "bar"},
+        {FROM: ts_1, TO: ts_2, "nk": 1, "c1": "foo"},  # updated
+        {FROM: ts_2, TO: None, "nk": 1, "c1": "foo_updated"},  # new
     ]
     assert_records_as_set(
         get_table(p, "dim_test__c2"),
@@ -315,10 +291,10 @@ def r(data):
     assert_records_as_set(
         get_table(p, "dim_test"),
         [
-            {from_: ts_1, to: None, "nk": 2, "c1": "bar"},
-            {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo"},
-            {from_: ts_2, to: ts_3, "nk": 1, "c1": "foo_updated"},  # updated
-            {from_: ts_3, to: None, "nk": 1, "c1": "foo_updated"},  # new
+            {FROM: ts_1, TO: None, "nk": 2, "c1": "bar"},
+            {FROM: ts_1, TO: ts_2, "nk": 1, "c1": "foo"},
+            {FROM: ts_2, TO: ts_3, "nk": 1, "c1": "foo_updated"},  # updated
+            {FROM: ts_3, TO: None, "nk": 1, "c1": "foo_updated"},  # new
         ],
     )
     exp_3 = [
@@ -341,10 +317,10 @@ def r(data):
     assert_records_as_set(
         get_table(p, "dim_test"),
         [
-            {from_: ts_1, to: ts_4, "nk": 2, "c1": "bar"},  # updated
-            {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo"},
-            {from_: ts_2, to: ts_3, "nk": 1, "c1": "foo_updated"},
-            {from_: ts_3, to: None, "nk": 1, "c1": "foo_updated"},
+            {FROM: ts_1, TO: ts_4, "nk": 2, "c1": "bar"},  # updated
+            {FROM: ts_1, TO: ts_2, "nk": 1, "c1": "foo"},
+            {FROM: ts_2, TO: ts_3, "nk": 1, "c1": "foo_updated"},
+            {FROM: ts_3, TO: None, "nk": 1, "c1": "foo_updated"},
         ],
     )
     assert_records_as_set(
@@ -362,11 +338,11 @@ def r(data):
     assert_records_as_set(
         get_table(p, "dim_test"),
         [
-            {from_: ts_1, to: ts_4, "nk": 2, "c1": "bar"},
-            {from_: ts_5, to: None, "nk": 3, "c1": "baz"},  # new
-            {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo"},
-            {from_: ts_2, to: ts_3, "nk": 1, "c1": "foo_updated"},
-            {from_: ts_3, to: None, "nk": 1, "c1": "foo_updated"},
+            {FROM: ts_1, TO: ts_4, "nk": 2, "c1": "bar"},
+            {FROM: ts_5, TO: None, "nk": 3, "c1": "baz"},  # new
+            {FROM: ts_1, TO: ts_2, "nk": 1, "c1": "foo"},
+            {FROM: ts_2, TO: ts_3, "nk": 1, "c1": "foo_updated"},
+            {FROM: ts_3, TO: None, "nk": 1, "c1": "foo_updated"},
         ],
     )
     assert_records_as_set(
@@ -519,13 +495,12 @@ def r(data):
     ts_3 = get_load_package_created_at(p, info)
 
     # assert parent records
-    from_, to = DEFAULT_VALIDITY_COLUMN_NAMES
     r1_no_child = {k: v for k, v in r1.items() if k != "child"}
     r2_no_child = {k: v for k, v in r2.items() if k != "child"}
     expected = [
-        {**{from_: ts_1, to: ts_2}, **r1_no_child},
-        {**{from_: ts_3, to: None}, **r1_no_child},
-        {**{from_: ts_1, to: None}, **r2_no_child},
+        {**{FROM: ts_1, TO: ts_2}, **r1_no_child},
+        {**{FROM: ts_3, TO: None}, **r1_no_child},
+        {**{FROM: ts_1, TO: None}, **r2_no_child},
     ]
     assert_records_as_set(get_table(p, "dim_test"), expected)
 
@@ -653,10 +628,9 @@ def r(data):
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     assert_load_info(info)
     assert load_table_counts(p, "dim_test")["dim_test"] == 2
-    from_, to = DEFAULT_VALIDITY_COLUMN_NAMES
     expected = [
-        {**{from_: strip_timezone(ts1), to: None}, **l1_1},
-        {**{from_: strip_timezone(ts1), to: None}, **l1_2},
+        {**{FROM: strip_timezone(ts1), TO: None}, **l1_1},
+        {**{FROM: strip_timezone(ts1), TO: None}, **l1_2},
     ]
     assert get_table(p, "dim_test", "nk") == expected
 
@@ -677,10 +651,10 @@ def r(data):
     assert_load_info(info)
     assert load_table_counts(p, "dim_test")["dim_test"] == 4
     expected = [
-        {**{from_: strip_timezone(ts1), to: strip_timezone(ts2)}, **l1_1},  # retired
-        {**{from_: strip_timezone(ts1), to: strip_timezone(ts2)}, **l1_2},  # retired
-        {**{from_: strip_timezone(ts2), to: None}, **l2_1},  # new
-        {**{from_: strip_timezone(ts2), to: None}, **l2_3},  # new
+        {**{FROM: strip_timezone(ts1), TO: strip_timezone(ts2)}, **l1_1},  # retired
+        {**{FROM: strip_timezone(ts1), TO: strip_timezone(ts2)}, **l1_2},  # retired
+        {**{FROM: strip_timezone(ts2), TO: None}, **l2_1},  # new
+        {**{FROM: strip_timezone(ts2), TO: None}, **l2_3},  # new
     ]
     assert_records_as_set(get_table(p, "dim_test"), expected)
 
@@ -699,10 +673,10 @@ def r(data):
     assert_load_info(info)
     assert load_table_counts(p, "dim_test")["dim_test"] == 4
     expected = [
-        {**{from_: strip_timezone(ts1), to: strip_timezone(ts2)}, **l1_1},  # unchanged
-        {**{from_: strip_timezone(ts1), to: strip_timezone(ts2)}, **l1_2},  # unchanged
-        {**{from_: strip_timezone(ts2), to: None}, **l2_1},  # unchanged
-        {**{from_: strip_timezone(ts2), to: strip_timezone(ts3)}, **l2_3},  # retired
+        {**{FROM: strip_timezone(ts1), TO: strip_timezone(ts2)}, **l1_1},  # unchanged
+        {**{FROM: strip_timezone(ts1), TO: strip_timezone(ts2)}, **l1_2},  # unchanged
+        {**{FROM: strip_timezone(ts2), TO: None}, **l2_1},  # unchanged
+        {**{FROM: strip_timezone(ts2), TO: strip_timezone(ts3)}, **l2_3},  # retired
     ]
     assert_records_as_set(get_table(p, "dim_test"), expected)
 
@@ -717,6 +691,196 @@ def r(data):
         )
 
 
+@pytest.mark.essential
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(default_sql_configs=True, supports_merge=True),
+    ids=lambda x: x.name,
+)
+def test_merge_key_natural_key(
+    destination_config: DestinationTestConfiguration,
+) -> None:
+    p = destination_config.setup_pipeline("abstract", dev_mode=True)
+
+    @dlt.resource(
+        merge_key="nk",
+        write_disposition={"disposition": "merge", "strategy": "scd2"},
+    )
+    def dim_test(data):
+        yield data
+
+    # load 1 — initial load
+    dim_snap = [
+        {"nk": 1, "foo": "foo"},
+        {"nk": 2, "foo": "foo"},
+    ]
+    info = p.run(dim_test(dim_snap), **destination_config.run_kwargs)
+    assert_load_info(info)
+    assert load_table_counts(p, "dim_test")["dim_test"] == 2
+    # both records should be active (i.e. not retired)
+    assert [row[TO] for row in get_table(p, "dim_test")] == [None, None]
+
+    # load 2 — natural key 2 is absent, natural key 1 is unchanged
+    dim_snap = [
+        {"nk": 1, "foo": "foo"},
+    ]
+    info = p.run(dim_test(dim_snap), **destination_config.run_kwargs)
+    assert_load_info(info)
+    assert load_table_counts(p, "dim_test")["dim_test"] == 2
+    # both records should still be active
+    assert [row[TO] for row in get_table(p, "dim_test")] == [None, None]
+
+    # load 3 — natural key 2 is absent, natural key 1 has changed
+    dim_snap = [
+        {"nk": 1, "foo": "bar"},
+    ]
+    info = p.run(dim_test(dim_snap), **destination_config.run_kwargs)
+    assert_load_info(info)
+    assert load_table_counts(p, "dim_test")["dim_test"] == 3
+    ts3 = get_load_package_created_at(p, info)
+    # natural key 1 should now have two records (one retired, one active)
+    actual = [{k: v for k, v in row.items() if k in ("nk", TO)} for row in get_table(p, "dim_test")]
+    expected = [{"nk": 1, TO: ts3}, {"nk": 1, TO: None}, {"nk": 2, TO: None}]
+    assert_records_as_set(actual, expected)  # type: ignore[arg-type]
+
+    # load 4 — natural key 2 is absent, natural key 1 has changed back to
+    # initial version
+    dim_snap = [
+        {"nk": 1, "foo": "foo"},
+    ]
+    info = p.run(dim_test(dim_snap), **destination_config.run_kwargs)
+    assert_load_info(info)
+    assert load_table_counts(p, "dim_test")["dim_test"] == 4
+    ts4 = get_load_package_created_at(p, info)
+    # natural key 1 should now have three records (two retired, one active)
+    actual = [{k: v for k, v in row.items() if k in ("nk", TO)} for row in get_table(p, "dim_test")]
+    expected = [{"nk": 1, TO: ts3}, {"nk": 1, TO: ts4}, {"nk": 1, TO: None}, {"nk": 2, TO: None}]
+    assert_records_as_set(actual, expected)  # type: ignore[arg-type]
+
+
+@pytest.mark.essential
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(default_sql_configs=True, supports_merge=True),
+    ids=lambda x: x.name,
+)
+@pytest.mark.parametrize("key_type", ("text", "bigint"))
+def test_merge_key_compound_natural_key(
+    destination_config: DestinationTestConfiguration,
+    key_type: TDataType,
+) -> None:
+    p = destination_config.setup_pipeline("abstract", dev_mode=True)
+
+    @dlt.resource(
+        merge_key=["first_name", "last_name"],
+        write_disposition={"disposition": "merge", "strategy": "scd2"},
+    )
+    def dim_test_compound(data):
+        yield data
+
+    # vary `first_name` type to test mixed compound `merge_key`
+    if key_type == "text":
+        first_name = "John"
+    elif key_type == "bigint":
+        first_name = 1  # type: ignore[assignment]
+    # load 1 — initial load
+    dim_snap = [
+        {"first_name": first_name, "last_name": "Doe", "age": 20},
+        {"first_name": first_name, "last_name": "Dodo", "age": 20},
+    ]
+    info = p.run(dim_test_compound(dim_snap), **destination_config.run_kwargs)
+    assert_load_info(info)
+    assert load_table_counts(p, "dim_test_compound")["dim_test_compound"] == 2
+    # both records should be active (i.e. not retired)
+    assert [row[TO] for row in get_table(p, "dim_test_compound")] == [None, None]
+
+    # load 2 — "Dodo" is absent, "Doe" has changed
+    dim_snap = [
+        {"first_name": first_name, "last_name": "Doe", "age": 30},
+    ]
+    info = p.run(dim_test_compound(dim_snap), **destination_config.run_kwargs)
+    assert_load_info(info)
+    assert load_table_counts(p, "dim_test_compound")["dim_test_compound"] == 3
+    ts3 = get_load_package_created_at(p, info)
+    # "Doe" should now have two records (one retired, one active)
+    actual = [
+        {k: v for k, v in row.items() if k in ("first_name", "last_name", TO)}
+        for row in get_table(p, "dim_test_compound")
+    ]
+    expected = [
+        {"first_name": first_name, "last_name": "Doe", TO: ts3},
+        {"first_name": first_name, "last_name": "Doe", TO: None},
+        {"first_name": first_name, "last_name": "Dodo", TO: None},
+    ]
+    assert_records_as_set(actual, expected)  # type: ignore[arg-type]
+
+
+@pytest.mark.essential
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(default_sql_configs=True, supports_merge=True),
+    ids=lambda x: x.name,
+)
+def test_merge_key_partition(
+    destination_config: DestinationTestConfiguration,
+) -> None:
+    p = destination_config.setup_pipeline("abstract", dev_mode=True)
+
+    @dlt.resource(
+        merge_key="date",
+        write_disposition={"disposition": "merge", "strategy": "scd2"},
+    )
+    def dim_test(data):
+        yield data
+
+    # load 1 — "2024-01-01" partition
+    dim_snap = [
+        {"date": "2024-01-01", "name": "a"},
+        {"date": "2024-01-01", "name": "b"},
+    ]
+    info = p.run(dim_test(dim_snap), **destination_config.run_kwargs)
+    assert_load_info(info)
+    assert load_table_counts(p, "dim_test")["dim_test"] == 2
+    # both records should be active (i.e. not retired)
+    assert [row[TO] for row in get_table(p, "dim_test")] == [None, None]
+
+    # load 2 — "2024-01-02" partition
+    dim_snap = [
+        {"date": "2024-01-02", "name": "c"},
+        {"date": "2024-01-02", "name": "d"},
+    ]
+    info = p.run(dim_test(dim_snap), **destination_config.run_kwargs)
+    assert_load_info(info)
+    assert load_table_counts(p, "dim_test")["dim_test"] == 4
+    # two "2024-01-01" records should be untouched, two "2024-01-02" records should
+    # be added
+    assert [row[TO] for row in get_table(p, "dim_test")] == [None, None, None, None]
+
+    # load 3 — reload "2024-01-01" partition
+    dim_snap = [
+        {"date": "2024-01-01", "name": "a"},  # unchanged
+        {"date": "2024-01-01", "name": "bb"},  # new
+    ]
+    info = p.run(dim_test(dim_snap), **destination_config.run_kwargs)
+    assert_load_info(info)
+    # "b" should be retired, "bb" should be added, "2024-01-02" partition
+    # should be untouched
+    assert load_table_counts(p, "dim_test")["dim_test"] == 5
+    ts2 = get_load_package_created_at(p, info)
+    actual = [
+        {k: v for k, v in row.items() if k in ("date", "name", TO)}
+        for row in get_table(p, "dim_test")
+    ]
+    expected = [
+        {"date": "2024-01-01", "name": "a", TO: None},
+        {"date": "2024-01-01", "name": "b", TO: ts2},
+        {"date": "2024-01-01", "name": "bb", TO: None},
+        {"date": "2024-01-02", "name": "c", TO: None},
+        {"date": "2024-01-02", "name": "d", TO: None},
+    ]
+    assert_records_as_set(actual, expected)  # type: ignore[arg-type]
+
+
 @pytest.mark.parametrize(
     "destination_config",
     destinations_configs(default_sql_configs=True, subset=["duckdb"]),
@@ -750,9 +914,8 @@ def _make_scd2_r(table_: Any) -> DltResource:
     # make sure we have scd2 columns in schema
     table_schema = p.default_schema.get_table("tabular")
     assert table_schema["x-merge-strategy"] == "scd2"  # type: ignore[typeddict-item]
-    from_, to = DEFAULT_VALIDITY_COLUMN_NAMES
-    assert table_schema["columns"][from_]["x-valid-from"]  # type: ignore[typeddict-item]
-    assert table_schema["columns"][to]["x-valid-to"]  # type: ignore[typeddict-item]
+    assert table_schema["columns"][FROM]["x-valid-from"]  # type: ignore[typeddict-item]
+    assert table_schema["columns"][TO]["x-valid-to"]  # type: ignore[typeddict-item]
     assert table_schema["columns"]["row_hash"]["x-row-version"]  # type: ignore[typeddict-item]
     # 100 items in destination
     assert load_table_counts(p, "tabular")["tabular"] == 100
@@ -816,13 +979,12 @@ def r(data):
     ts_2 = get_load_package_created_at(p, info)
 
     # assert load results
-    from_, to = DEFAULT_VALIDITY_COLUMN_NAMES
     assert get_table(p, "dim_test", "c1") == [
-        {from_: ts_1, to: ts_2, "nk": 2, "c1": "bar", "row_hash": "mocked_hash_2"},
-        {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo", "row_hash": "mocked_hash_1"},
+        {FROM: ts_1, TO: ts_2, "nk": 2, "c1": "bar", "row_hash": "mocked_hash_2"},
+        {FROM: ts_1, TO: ts_2, "nk": 1, "c1": "foo", "row_hash": "mocked_hash_1"},
         {
-            from_: ts_2,
-            to: None,
+            FROM: ts_2,
+            TO: None,
             "nk": 1,
             "c1": "foo_upd",
             "row_hash": "mocked_hash_1_upd",

From 93cd5a6aae1b86c96e18aba5081694a2bc98133a Mon Sep 17 00:00:00 2001
From: Emmanuel Ferdman <emmanuelferdman@gmail.com>
Date: Mon, 30 Sep 2024 20:05:28 +0300
Subject: [PATCH 06/29] Update weaviate reference (#1896)

Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
---
 docs/website/docs/dlt-ecosystem/destinations/weaviate.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md
index cce54654b8..214cc3aa4b 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md
@@ -305,7 +305,7 @@ Below is an example that configures the **contextionary** vectorizer. You can pu
 vectorizer="text2vec-contextionary"
 module_config={text2vec-contextionary = { vectorizeClassName = false, vectorizePropertyName = true}}
 ```
-You can find Docker Compose with the instructions to run [here](https://github.com/dlt-hub/dlt/tree/devel/dlt/destinations/weaviate/README.md).
+You can find Docker Compose with the instructions to run [here](https://github.com/dlt-hub/dlt/tree/devel/dlt/destinations/impl/weaviate/README.md).
 
 ### dbt support
 

From 854905fb56576bc608b01b6b047208df888160a7 Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Mon, 30 Sep 2024 19:12:08 +0200
Subject: [PATCH 07/29] Expand ENV abbreviation in the docs (#1846)

---
 .../dlt-ecosystem/destinations/destination.md |  2 +-
 .../file-formats/_set_the_format.mdx          |  2 +-
 .../docs/general-usage/credentials/setup.md   | 30 ++++++++---------
 docs/website/docs/tutorial/filesystem.md      |  4 +--
 .../deploy-a-pipeline/deploy-with-dagster.md  | 32 +++++++++----------
 5 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/destinations/destination.md b/docs/website/docs/dlt-ecosystem/destinations/destination.md
index 7b1e1b23a4..a7f7c5fe16 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/destination.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/destination.md
@@ -145,7 +145,7 @@ There are multiple ways to pass the custom destination function to the `dlt` pip
       )
   )
   ```
-- Via a fully qualified string to function location (can be used from `config.toml` or ENV vars). The destination function should be located in another file.
+- Via a fully qualified string to function location (this can be set in `config.toml` or through environment variables). The destination function should be located in another file.
   ```py
   # File my_pipeline.py
 
diff --git a/docs/website/docs/dlt-ecosystem/file-formats/_set_the_format.mdx b/docs/website/docs/dlt-ecosystem/file-formats/_set_the_format.mdx
index e2cce374a2..285f9b0264 100644
--- a/docs/website/docs/dlt-ecosystem/file-formats/_set_the_format.mdx
+++ b/docs/website/docs/dlt-ecosystem/file-formats/_set_the_format.mdx
@@ -16,7 +16,7 @@ info = pipeline.run(some_source(), loader_file_format="{props.file_type}")
 loader_file_format="{props.file_type}"
 </pre>
 
-3. You can set the `loader_file_format` via ENV variable:
+3. You can set the `loader_file_format` via environment variable:
 
 <pre language="sh">
 export NORMALIZE__LOADER_FILE_FORMAT="{props.file_type}"
diff --git a/docs/website/docs/general-usage/credentials/setup.md b/docs/website/docs/general-usage/credentials/setup.md
index 5f05e68b6d..4210ab5422 100644
--- a/docs/website/docs/general-usage/credentials/setup.md
+++ b/docs/website/docs/general-usage/credentials/setup.md
@@ -45,8 +45,8 @@ The most specific possible path for **sources** looks like:
   groupId="config-provider-type"
   defaultValue="toml"
   values={[
-    {"label": "Toml config provider", "value": "toml"},
-    {"label": "ENV variables", "value": "env"},
+    {"label": "TOML config provider", "value": "toml"},
+    {"label": "Environment variables", "value": "env"},
     {"label": "In the code", "value": "code"},
 ]}>
   <TabItem value="toml">
@@ -78,8 +78,8 @@ The most specific possible path for **destinations** looks like:
   groupId="config-provider-type"
   defaultValue="toml"
   values={[
-    {"label": "Toml config provider", "value": "toml"},
-    {"label": "ENV variables", "value": "env"},
+    {"label": "TOML config provider", "value": "toml"},
+    {"label": "Environment variables", "value": "env"},
     {"label": "In the code", "value": "code"},
 ]}>
   <TabItem value="toml">
@@ -285,8 +285,8 @@ Let's assume we have a [notion](../../dlt-ecosystem/verified-sources/notion) sou
   groupId="config-provider-type"
   defaultValue="toml"
   values={[
-    {"label": "Toml config provider", "value": "toml"},
-    {"label": "ENV variables", "value": "env"},
+    {"label": "TOML config provider", "value": "toml"},
+    {"label": "Environment variables", "value": "env"},
     {"label": "In the code", "value": "code"},
 ]}>
 
@@ -319,7 +319,7 @@ aws_secret_access_key = "1234567890_access_key" # copy the secret access key her
   <TabItem value="env">
 
 ```sh
-# ENV vars are set up the same way both for configs and secrets
+# Environment variables are set up the same way both for configs and secrets
 export RUNTIME__LOG_LEVEL="INFO"
 export DESTINATION__FILESYSTEM__BUCKET_URL="s3://[your_bucket_name]"
 export NORMALIZE__DATA_WRITER__DISABLE_COMPRESSION="true"
@@ -376,8 +376,8 @@ Let's assume we use the `bigquery` destination and the `google_sheets` source. T
   groupId="config-provider-type"
   defaultValue="toml"
   values={[
-    {"label": "Toml config provider", "value": "toml"},
-    {"label": "ENV variables", "value": "env"},
+    {"label": "TOML config provider", "value": "toml"},
+    {"label": "Environment variables", "value": "env"},
     {"label": "In the code", "value": "code"},
 ]}>
 
@@ -424,8 +424,8 @@ os.environ["CREDENTIALS__PROJECT_ID"] = os.environ.get("GOOGLE_PROJECT_ID")
   groupId="config-provider-type"
   defaultValue="toml"
   values={[
-    {"label": "Toml config provider", "value": "toml"},
-    {"label": "ENV variables", "value": "env"},
+    {"label": "TOML config provider", "value": "toml"},
+    {"label": "Environment variables", "value": "env"},
     {"label": "In the code", "value": "code"},
 ]}>
 
@@ -506,8 +506,8 @@ Let's assume we have several different Google sources and destinations. We can u
   groupId="config-provider-type"
   defaultValue="toml"
   values={[
-    {"label": "Toml config provider", "value": "toml"},
-    {"label": "ENV variables", "value": "env"},
+    {"label": "TOML config provider", "value": "toml"},
+    {"label": "Environment variables", "value": "env"},
     {"label": "In the code", "value": "code"},
 ]}>
 
@@ -590,8 +590,8 @@ Let's assume we have several sources of the same type. How can we separate them
   groupId="config-provider-type"
   defaultValue="toml"
   values={[
-    {"label": "Toml config provider", "value": "toml"},
-    {"label": "ENV variables", "value": "env"},
+    {"label": "TOML config provider", "value": "toml"},
+    {"label": "Environment variables", "value": "env"},
     {"label": "In the code", "value": "code"},
 ]}>
 
diff --git a/docs/website/docs/tutorial/filesystem.md b/docs/website/docs/tutorial/filesystem.md
index 6d30eed3e6..b2555db39b 100644
--- a/docs/website/docs/tutorial/filesystem.md
+++ b/docs/website/docs/tutorial/filesystem.md
@@ -112,8 +112,8 @@ Let's specify the bucket URL and credentials. We can do this using the following
   groupId="config-provider-type"
   defaultValue="toml"
   values={[
-    {"label": "Toml config provider", "value": "toml"},
-    {"label": "ENV variables", "value": "env"},
+    {"label": "TOML config provider", "value": "toml"},
+    {"label": "Environment variables", "value": "env"},
     {"label": "In the code", "value": "code"},
 ]}>
 
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-dagster.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-dagster.md
index 14ac18b3e7..e27bb2966a 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-dagster.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-dagster.md
@@ -184,17 +184,17 @@ For a complete picture of Dagster's integration with dlt, please refer to their
 ### Frequently Asked Questions
 - **Can I remove the generated `.dlt` folder with `secrets.toml` and `config.toml` files?**
 
-  Yes. Since dlt is compatible with ENV variables, you can use this for secrets required by both Dagster and dlt.
-  
+  Yes. Since dlt is compatible with environment variables, you can use this for secrets required by both Dagster and dlt.
+
 - **I'm working with several sources – how can I best group these assets?**
 
   To effectively group assets in Dagster when working with multiple sources, use the `group_name` parameter in your `@dlt_assets` decorator. This helps organize and visualize assets related to a particular source or theme in the Dagster UI. Here’s a simplified example:
-  
+
   ```py
   import dlt
   from dagster_embedded_elt.dlt import dlt_assets
   from dlt_sources.google_analytics import google_analytics
-  
+
   # Define assets for the first Google Analytics source
   @dlt_assets(
       dlt_source=google_analytics(),
@@ -207,7 +207,7 @@ For a complete picture of Dagster's integration with dlt, please refer to their
   )
   def google_analytics_assets_1(context, dlt):
       yield from dlt.run(context=context)
-  
+
   # Define assets for the second Google Analytics source
   @dlt_assets(
       dlt_source=google_analytics(),
@@ -222,18 +222,18 @@ For a complete picture of Dagster's integration with dlt, please refer to their
       yield from dlt.run(context=context)
   ```
 
- 
-  
+
+
 - **How can I use `bigquery_adapter` with `@dlt_assets` in Dagster for partitioned tables?**
-   
-  To use `bigquery_adapter` with `@dlt_assets` in Dagster for partitioned tables, modify your resource setup to include `bigquery_adapter` with the partition parameter. Here's a quick example:  
-  
+
+  To use `bigquery_adapter` with `@dlt_assets` in Dagster for partitioned tables, modify your resource setup to include `bigquery_adapter` with the partition parameter. Here's a quick example:
+
   ```py
   import dlt
   from google.analytics import BetaAnalyticsDataClient
   from dlt.destinations.adapters import bigquery_adapter
   from dagster import dlt_asset
-  
+
   @dlt_asset
   def google_analytics_asset(context):
       # Configuration (replace with your actual values or parameters)
@@ -244,20 +244,20 @@ For a complete picture of Dagster's integration with dlt, please refer to their
       start_date = "2024-01-01"
       rows_per_page = 1000
       credentials = your_credentials
-  
+
       # Initialize Google Analytics client
       client = BetaAnalyticsDataClient(credentials=credentials.to_native_credentials())
-  
+
       # Fetch metadata
       metadata = get_metadata(client=client, property_id=property_id)
       resource_list = [metadata | metrics_table, metadata | dimensions_table]
-  
+
       # Configure and add resources to the list
       for query in queries:
           dimensions = query["dimensions"]
           if "date" not in dimensions:
               dimensions.append("date")
-  
+
           resource_name = query["resource_name"]
           resource_list.append(
               bigquery_adapter(
@@ -274,7 +274,7 @@ For a complete picture of Dagster's integration with dlt, please refer to their
                   partition="date"
               )
           )
-  
+
       return resource_list
   ```
 

From 2eb8cfe54eb4d6d3a595373ce2551f0ae6089d06 Mon Sep 17 00:00:00 2001
From: Violetta Mishechkina <sansiositres@gmail.com>
Date: Tue, 1 Oct 2024 15:00:58 +0200
Subject: [PATCH 08/29] Docs: Add sftp option for filesystem source (#1845)

---
 .../dlt-ecosystem/destinations/filesystem.md  |  5 ++-
 .../verified-sources/filesystem/basic.md      | 36 +++++++++++++++++--
 .../verified-sources/filesystem/index.md      |  7 ++--
 docs/website/docs/tutorial/filesystem.md      |  2 +-
 docs/website/sidebars.js                      |  2 +-
 5 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
index a456fa6e7d..2be382c326 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
@@ -302,7 +302,10 @@ sftp_gss_deleg_creds  # Delegate credentials with GSS-API, defaults to True
 sftp_gss_host         # Host for GSS-API, defaults to None
 sftp_gss_trust_dns    # Trust DNS for GSS-API, defaults to True
 ```
-> For more information about credentials parameters: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect
+
+:::info
+For more information about credentials parameters: https://docs.paramiko.org/en/3.3/api/client.html#paramiko.client.SSHClient.connect
+:::
 
 ### Authentication methods
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
index 847ff64bf1..6eb02b4edf 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
@@ -6,7 +6,7 @@ keywords: [readers source and filesystem, files, filesystem, readers source, clo
 import Header from '../_source-info-header.md';
 <Header/>
 
-Filesystem source allows loading files from remote locations (AWS S3, Google Cloud Storage, Google Drive, Azure) or the local filesystem seamlessly. Filesystem source natively supports `csv`, `parquet`, and `jsonl` files and allows customization for loading any type of structured files.
+Filesystem source allows loading files from remote locations (AWS S3, Google Cloud Storage, Google Drive, Azure Blob Storage, SFTP server) or the local filesystem seamlessly. Filesystem source natively supports `csv`, `parquet`, and `jsonl` files and allows customization for loading any type of structured files.
 
 To load unstructured data (`.pdf`, `.txt`, e-mail), please refer to the [unstructured data source](https://github.com/dlt-hub/verified-sources/tree/master/sources/unstructured_data).
 
@@ -75,6 +75,7 @@ To get started with your data pipeline, follow these steps:
     {"label": "AWS S3", "value": "aws"},
     {"label": "GCS/GDrive", "value": "gcp"},
     {"label": "Azure", "value": "azure"},
+    {"label": "SFTP", "value": "sftp"},
     {"label": "Local filesystem", "value": "local"},
 ]}>
 
@@ -122,6 +123,18 @@ For more info, see
 
 </TabItem>
 
+<TabItem value="sftp">
+
+dlt supports several authentication methods:
+
+1. Key-based authentication
+2. SSH Agent-based authentication
+3. Username/Password authentication
+4. GSS-API authentication
+
+Learn more about SFTP authentication options in [SFTP section](../../destinations/filesystem#sftp). To obtain credentials, contact your server administrator.
+</TabItem>
+
 <TabItem value="local">
 You don't need any credentials for the local filesystem.
 </TabItem>
@@ -143,6 +156,7 @@ a bucket, can be specified in `config.toml`.
     {"label": "AWS S3", "value": "aws"},
     {"label": "GCS/GDrive", "value": "gcp"},
     {"label": "Azure", "value": "azure"},
+    {"label": "SFTP", "value": "sftp"},
     {"label": "Local filesystem", "value": "local"},
 ]}>
 
@@ -195,6 +209,24 @@ bucket_url="gs://<bucket_name>/<path_to_files>/"
 ```
 </TabItem>
 
+<TabItem value="sftp">
+
+Learn how to set up SFTP credentials for each authentication method in the [SFTP section](../../destinations/filesystem#sftp).
+For example, in case of key-based authentication, you can configure the source the following way:
+
+```toml
+# secrets.toml
+[sources.filesystem.credentials]
+sftp_username = "foo"
+sftp_key_filename = "/path/to/id_rsa"     # Replace with the path to your private key file
+sftp_key_passphrase = "your_passphrase"   # Optional: passphrase for your private key
+
+# config.toml
+[sources.filesystem] # use [sources.readers.credentials] for the "readers" source
+bucket_url = "sftp://[hostname]/[path]"
+```
+</TabItem>
+
 <TabItem value="local">
 
 You can use both native local filesystem paths and `file://` URI. Absolute, relative, and UNC Windows paths are supported.
@@ -219,7 +251,7 @@ bucket_url='~\Documents\csv_files\'
 </Tabs>
 
 You can also specify the credentials using Environment variables. The name of the corresponding environment
-variable should be slightly different than the corresponding name in the `toml` file. Simply replace dots `.` with double
+variable should be slightly different from the corresponding name in the `toml` file. Simply replace dots `.` with double
 underscores `__`:
 
 ```sh
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md
index 32e0df77c2..1441931340 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md
@@ -1,6 +1,6 @@
 ---
-title: Filesystem & Buckets
-description: dlt-verified source for Filesystem & Buckets
+title: Filesystem & cloud storage
+description: dlt-verified source for Filesystem & cloud storage
 keywords: [readers source and filesystem, files, filesystem, readers source, cloud storage]
 ---
 
@@ -8,7 +8,8 @@ The Filesystem source allows seamless loading of files from the following locati
 * AWS S3
 * Google Cloud Storage
 * Google Drive
-* Azure
+* Azure Blob Storage
+* remote filesystem (via SFTP)
 * local filesystem
 
 The Filesystem source natively supports `csv`, `parquet`, and `jsonl` files and allows customization for loading any type of structured files.
diff --git a/docs/website/docs/tutorial/filesystem.md b/docs/website/docs/tutorial/filesystem.md
index b2555db39b..f939cc1f4f 100644
--- a/docs/website/docs/tutorial/filesystem.md
+++ b/docs/website/docs/tutorial/filesystem.md
@@ -4,7 +4,7 @@ description: Learn how to load data files like JSON, JSONL, CSV, and Parquet fro
 keywords: [dlt, tutorial, filesystem, cloud storage, file system, python, data pipeline, incremental loading, json, jsonl, csv, parquet, duckdb]
 ---
 
-This tutorial is for you if you need to load data files like JSONL, CSV, and Parquet from either Cloud Storage (e.g., AWS S3, Google Cloud Storage, Google Drive, Azure Blob Storage) or a local file system.
+This tutorial is for you if you need to load data files like JSONL, CSV, and Parquet from either Cloud Storage (e.g., AWS S3, Google Cloud Storage, Google Drive, Azure Blob Storage), a remote (SFTP), or a local file system.
 
 ## What you will learn
 
diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js
index 7e6000a2ca..32bb554842 100644
--- a/docs/website/sidebars.js
+++ b/docs/website/sidebars.js
@@ -67,7 +67,7 @@ const sidebars = {
         {
           type: 'category',
           label: 'Filesystem & cloud storage',
-          description: 'AWS S3, Google Cloud Storage, Azure Blob Storage, local file system',
+          description: 'AWS S3, Google Cloud Storage, Azure, SFTP, local file system',
             link: {
             type: 'doc',
             id: 'dlt-ecosystem/verified-sources/filesystem/index',

From e21ab01ba463b983c96937dafc78b4c783969083 Mon Sep 17 00:00:00 2001
From: erik james mason <ejmdpg@gmail.com>
Date: Tue, 1 Oct 2024 06:06:42 -0700
Subject: [PATCH 09/29] Fix a typo in installation.md (#1899)

---
 docs/website/docs/reference/installation.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/website/docs/reference/installation.md b/docs/website/docs/reference/installation.md
index a19e01ae80..e64e691c20 100644
--- a/docs/website/docs/reference/installation.md
+++ b/docs/website/docs/reference/installation.md
@@ -109,7 +109,7 @@ C:\> .\env\Scripts\activate
 You can now install `dlt` in your virtual environment by running:
 
 ```sh
-# install the newest dlt version or upgrade the exisint version to the newest one
+# install the newest dlt version or upgrade the existing version to the newest one
 pip install -U dlt
 ```
 
@@ -143,4 +143,4 @@ You are now ready to build your first pipeline with `dlt`. Check out these tutor
 - [Load data from a SQL database](../tutorial/sql-database)
 - [Load data from a cloud storage or a file system](../tutorial/filesystem)
 
-Or read a more detailed tutorial on how to build a [custom data pipeline with dlt](../tutorial/load-data-from-an-api.md).
\ No newline at end of file
+Or read a more detailed tutorial on how to build a [custom data pipeline with dlt](../tutorial/load-data-from-an-api.md).

From cb9bbd96adc0fae8a88cef93e0f92bb5686613d6 Mon Sep 17 00:00:00 2001
From: David Scharf <shrps@posteo.net>
Date: Tue, 1 Oct 2024 16:25:57 +0200
Subject: [PATCH 10/29] fix grammar pages 80-100 (#1906)

* fix grammar pages 80-100

* Update docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md

Co-authored-by: Violetta Mishechkina <sansiositres@gmail.com>

* Update docs/website/docs/dlt-ecosystem/verified-sources/freshdesk.md

Co-authored-by: Violetta Mishechkina <sansiositres@gmail.com>

---------

Co-authored-by: Violetta Mishechkina <sansiositres@gmail.com>
---
 .../verified-sources/airtable.md              |  33 ++--
 .../dlt-ecosystem/verified-sources/asana.md   |  54 ++----
 .../dlt-ecosystem/verified-sources/chess.md   |  41 ++--
 .../verified-sources/facebook_ads.md          | 181 +++++++-----------
 .../verified-sources/freshdesk.md             |  32 ++--
 .../verified-sources/google_ads.md            |  49 ++---
 .../dlt-ecosystem/verified-sources/hubspot.md |  74 +++----
 .../dlt-ecosystem/verified-sources/index.md   |   7 +-
 .../dlt-ecosystem/verified-sources/mongodb.md |  55 +++---
 .../dlt-ecosystem/verified-sources/mux.md     |  19 +-
 .../verified-sources/pg_replication.md        |  26 +--
 .../verified-sources/salesforce.md            | 103 ++++------
 .../dlt-ecosystem/verified-sources/shopify.md |  86 ++++-----
 .../verified-sources/sql_database/index.md    |  12 +-
 .../verified-sources/sql_database/setup.md    |  13 +-
 .../sql_database/troubleshooting.md           |  31 +--
 .../verified-sources/sql_database/usage.md    |  16 +-
 .../dlt-ecosystem/verified-sources/strapi.md  |   9 +-
 .../verified-sources/workable.md              |  76 ++++----
 .../dlt-ecosystem/verified-sources/zendesk.md |  73 +++----
 20 files changed, 433 insertions(+), 557 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md b/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md
index 43d99a02fd..a2e15bfd75 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md
@@ -14,20 +14,20 @@ data management and collaboration.
 
 This Airtable `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/airtable_pipeline.py)
-loads data using “Airtable API” to the destination of your choice.
+loads data using the “Airtable API” to the destination of your choice.
 
 Sources and resources that can be loaded using this verified source are:
 
 | Name              | Description                                |
 | ----------------- |--------------------------------------------|
 | airtable_source   | Retrieves tables from an Airtable base     |
-| airtable_resource | Retrives data from a single Airtable table |
+| airtable_resource | Retrieves data from a single Airtable table |
 
-## Setup Guide
+## Setup guide
 
 ### Grab Airtable personal access tokens
 
-1. Click your account icon top-right.
+1. Click your account icon in the top-right.
 1. Choose "Developer Hub" from the dropdown.
 1. Select "Personal access token" on the left, then "Create new token".
 1. Name it appropriately.
@@ -90,16 +90,16 @@ For more information, read the guide on [how to add a verified source.](../../wa
 
       ```toml
       [sources.airtable]
-      access_token = "Please set me up!" # please set me up!
+      access_token = "Please set me up!" # Please set me up!
       ```
 
 1. Finally, enter credentials for your chosen destination as per the [docs](../destinations/).
 
-1. Next you need to configure ".dlt/config.toml", which looks like:
+1. Next, you need to configure ".dlt/config.toml", which looks like:
 
    ```toml
    [sources.airtable]
-   base_id = "Please set me up!"       # The id of the base.
+   base_id = "Please set me up!"       # The ID of the base.
    table_names = ["Table1","Table2"]   # A list of table IDs or table names to load.
    ```
 
@@ -142,7 +142,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 ### Source `airtable_source`
 
-This function retrieves tables from given Airtable base.
+This function retrieves tables from a given Airtable base.
 
 ```py
 @dlt.source
@@ -178,10 +178,11 @@ def airtable_resource(
 
 ## Customization
 
+
+
 ### Create your own pipeline
 
-If you wish to create your own pipelines, you can leverage source and resource methods from this
-verified source.
+If you wish to create your own pipelines, you can leverage source and resource methods from this verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
@@ -196,7 +197,7 @@ verified source.
 1. To load the entire base:
 
    ```py
-   base_id = "Please set me up!"     # The id of the base.
+   base_id = "Please set me up!"     # The ID of the base.
 
    airtables = airtable_source(base_id=base_id)
    load_info = pipeline.run(load_data, write_disposition="replace")
@@ -205,8 +206,8 @@ verified source.
 1. To load selected tables from a base table:
 
    ```py
-   base_id = "Please set me up!"     # The id of the base.
-   table_names = ["Table1","Table2"] # A list of table IDs or table names to load.
+   base_id = "Please set me up!"     # The ID of the base.
+   table_names = ["Table1", "Table2"] # A list of table IDs or table names to load.
 
    airtables = airtable_source(
       base_id = base_id,
@@ -221,14 +222,14 @@ verified source.
 1. To load data and apply hints to a specific column:
 
    ```py
-   base_id = "Please set me up!"       # The id of the base.
-   table_names = ["Table1","Table2"]   # A list of table IDs or table names to load.
+   base_id = "Please set me up!"       # The ID of the base.
+   table_names = ["Table1", "Table2"]   # A list of table IDs or table names to load.
    resource_name = "Please set me up!" # The table name we want to apply hints.
    field_name = "Please set me up!"    # The table field name for which we want to apply hints.
 
    airtables = airtable_source(
         base_id="Please set me up!",
-        table_names=["Table1","Table2"],
+        table_names=["Table1", "Table2"],
    )
 
    airtables.resources[resource_name].apply_hints(
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/asana.md b/docs/website/docs/dlt-ecosystem/verified-sources/asana.md
index 173cc42b8a..67e52596b2 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/asana.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/asana.md
@@ -20,18 +20,18 @@ Resources that can be loaded using this verified source are:
 
 | Name       | Description                                                                                           |
 | ---------- | ----------------------------------------------------------------------------------------------------- |
-| workspaces | people, materials, or assets required to complete a task or project successfully                      |
-| projects   | collections of tasks and related information                                                          |
-| sections   | used to organize tasks within a project into smaller groups or categories                             |
-| tags       | labels that can be attached to tasks, projects, or conversations to help categorize and organize them |
-| stories    | updates or comments that team members can add to a task or project                                    |
-| teams      | groups of individuals who work together to complete projects and tasks                                |
-| users      | individuals who have access to the Asana platform                                                     |
+| workspaces | People, materials, or assets required to complete a task or project successfully                      |
+| projects   | Collections of tasks and related information                                                          |
+| sections   | Used to organize tasks within a project into smaller groups or categories                             |
+| tags       | Labels that can be attached to tasks, projects, or conversations to help categorize and organize them |
+| stories    | Updates or comments that team members can add to a task or project                                    |
+| teams      | Groups of individuals who work together to complete projects and tasks                                |
+| users      | Individuals who have access to the Asana platform                                                     |
 
 To get a complete list of sub-endpoints that can be loaded, see
 [asana_dlt/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/asana_dlt/settings.py)
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
@@ -161,12 +161,9 @@ workspace from the iterator obtained. This enables the workspaces to be consumed
 
 ### Resource-transformer `projects`
 
-In addition to these source and resource functions, there are seven transformer functions. For
-various endpoints like “projects”, “sections”, “tags”, “tasks”, “stories”, “teams” and “users”. The
-transformer functions transform or process data from one or more resources.
+In addition to these source and resource functions, there are seven transformer functions for various endpoints like "projects", "sections", "tags", "tasks", "stories", "teams", and "users". The transformer functions transform or process data from one or more resources.
 
-The transformer function `projects` process data from the `workspaces` resource. It
-fetches and returns a list of projects for a given workspace from Asana.
+The transformer function `projects` processes data from the `workspaces` resource. It fetches and returns a list of projects for a given workspace from Asana.
 
 ```py
 @dlt.transformer(
@@ -184,18 +181,15 @@ def projects(
 
 `workspace`: The data item from the 'workspaces' resource.
 
-`access_token`: Token required to authenticate the Asana API. This token is defined in the
-`.dlt/secret.toml` file.
+`access_token`: Token required to authenticate the Asana API. This token is defined in the `.dlt/secret.toml` file.
 
-`fields`: A list of workspace fields to be fetched from `asana_dlt/settings.py`. For example,
-"name", "members", "completed", etc.
+`fields`: A list of workspace fields to be fetched from `asana_dlt/settings.py`. For example, "name", "members", "completed", etc.
 
-It uses `@dlt.defer` decorator to enable parallel run in thread pool.
+It uses the `@dlt.defer` decorator to enable parallel run in a thread pool.
 
 ### Resource-transformer `tasks`
 
-This [incremental](../../general-usage/incremental-loading.md) resource-transformer fetches all
-tasks for a given project from Asana.
+This [incremental](../../general-usage/incremental-loading.md) resource-transformer fetches all tasks for a given project from Asana.
 
 ```py
 @dlt.transformer(data_from=projects, write_disposition="merge", primary_key="gid")
@@ -212,23 +206,19 @@ def tasks(
 
 `workspace`: The data item from the 'projects' resource.
 
-`access_token`: Token required to authenticate the Asana API. This token is defined in the
-`.dlt/secret.toml` file.
+`access_token`: Token required to authenticate the Asana API. This token is defined in the `.dlt/secret.toml` file.
 
 `modified_at`: The date from which to fetch modified tasks.
 
-`fields`: A list of workspace fields to be fetched from `asana_dlt/settings.py`. For example,
-"name", "assignee", "completed", etc.
+`fields`: A list of workspace fields to be fetched from `asana_dlt/settings.py`. For example, "name", "assignee", "completed", etc.
 
 ## Customization
 
 ### Create your own pipeline
 
-If you wish to create your own pipelines, you can leverage source and resource methods from this
-verified source.
+If you wish to create your own pipelines, you can leverage source and resource methods from this verified source.
 
-To create your data pipeline using single loading for “workspaces” and “projects” endpoints, follow
-these steps:
+To create your data pipeline using single loading for the "workspaces" and "projects" endpoints, follow these steps:
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
@@ -240,10 +230,9 @@ these steps:
    )
    ```
 
-   To read more about pipeline configuration, please refer to our
-   [documentation](../../general-usage/pipeline).
+   To read more about pipeline configuration, please refer to our [documentation](../../general-usage/pipeline).
 
-1. To load the data from all the fields, you can utilise the `asana_source` method as follows:
+1. To load the data from all the fields, you can utilize the `asana_source` method as follows:
 
    ```py
    load_data = asana_source()
@@ -257,8 +246,7 @@ these steps:
    print(load_info)
    ```
 
-1. To use the method `pipeline.run()` to load custom endpoints “workspaces” and “projects”, the
-   above script may be modified as:
+1. To use the method `pipeline.run()` to load custom endpoints "workspaces" and "projects", the above script may be modified as:
 
    ```py
    load_info = pipeline.run(load_data.with_resources("workspaces", "projects"))
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/chess.md b/docs/website/docs/dlt-ecosystem/verified-sources/chess.md
index 663dda7259..378eedaf62 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/chess.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/chess.md
@@ -16,11 +16,11 @@ Resources that can be loaded using this verified source are:
 
 | Name             | Description                                                            |
 | ---------------- | ---------------------------------------------------------------------- |
-| players_profiles | retrives player profiles for a list of player usernames                |
-| players_archives | retrives url to game archives for specified players                    |
-| players_games    | retrives players games that happened between start_month and end_month |
+| players_profiles | retrieves player profiles for a list of player usernames                |
+| players_archives | retrieves URL to game archives for specified players                    |
+| players_games    | retrieves players' games that happened between start_month and end_month |
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
@@ -93,7 +93,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 ### Source `source`
 
 This is a `dlt.source` function for the Chess.com API named "chess", which returns a sequence of
-DltResource objects. That we'll discuss in subsequent sections as resources.
+DltResource objects. We'll discuss these in subsequent sections as resources.
 
 ```py
 dlt.source(name="chess")
@@ -129,13 +129,13 @@ def players_profiles(players: List[str]) -> Iterator[TDataItem]:
         yield _get_profile(username)
 ```
 
-`players`: Is a list of player usernames for which you want to fetch profile data.
+`players`: This is a list of player usernames for which you want to fetch profile data.
 
-It uses `@dlt.defer` decorator to enable parallel run in thread pool.
+It uses the `@dlt.defer` decorator to enable parallel run in a thread pool.
 
 ### Resource `players_archives`
 
-This is a `dlt.resource` function, which returns url to game archives for specified players.
+This is a `dlt.resource` function, which returns a URL to game archives for specified players.
 
 ```py
 @dlt.resource(write_disposition="replace", selected=False)
@@ -143,9 +143,9 @@ def players_archives(players: List[str]) -> Iterator[List[TDataItem]]:
     ...
 ```
 
-`players`: Is a list of player usernames for which you want to fetch archives.
+`players`: This is a list of player usernames for which you want to fetch archives.
 
-`selected=False`: Parameter means that this resource is not selected by default when the pipeline
+`selected=False`: This parameter means that this resource is not selected by default when the pipeline
 runs.
 
 ### Resource `players_games`
@@ -158,28 +158,29 @@ specified otherwise.
 def players_games(
     players: List[str], start_month: str = None, end_month: str = None
 ) -> Iterator[TDataItems]:
-    # gets a list of already checked(loaded) archives.
+    # gets a list of already checked (loaded) archives.
     checked_archives = dlt.current.resource_state().setdefault("archives", [])
     yield {}  # return your retrieved data here
 ```
 
-`players`: Is a list of player usernames for which you want to fetch games.
+`players`: This is a list of player usernames for which you want to fetch games.
 
-List `checked_archives` is used to load new archives and skip the ones already loaded. It uses state
+The list `checked_archives` is used to load new archives and skip the ones already loaded. It uses state
 to initialize a list called "checked_archives" from the current resource
 [state](../../general-usage/state).
 
 ### Resource `players_online_status`
 
-The `players_online_status` is a `dlt.resource` function checks current online status of multiple chess players. It
+The `players_online_status` is a `dlt.resource` function that checks the current online status of multiple chess players. It
 retrieves their username, status, last login date, and check time.
 
 ## Customization
 
+
+
 ### Create your own pipeline
 
-If you wish to create your own pipelines, you can leverage source and resource methods from this
-verified source.
+If you wish to create your own pipelines, you can leverage source and resource methods from this verified source.
 
 To create your data loading pipeline for players and load data, follow these steps:
 
@@ -193,10 +194,9 @@ To create your data loading pipeline for players and load data, follow these ste
    )
    ```
 
-   To read more about pipeline configuration, please refer to our
-   [documentation](../../general-usage/pipeline).
+   To read more about pipeline configuration, please refer to our [documentation](../../general-usage/pipeline).
 
-1. To load the data from all the resources for specific players (e.g. for November), you can utilise the `source` method as follows:
+1. To load the data from all the resources for specific players (e.g., for November), you can utilize the `source` method as follows:
 
    ```py
    # Loads games for Nov 2022
@@ -215,8 +215,7 @@ To create your data loading pipeline for players and load data, follow these ste
    print(info)
    ```
 
-1. To load data from specific resources like "players_games" and "player_profiles", modify the above
-   code as:
+1. To load data from specific resources like "players_games" and "player_profiles", modify the above code as:
 
    ```py
    info = pipeline.run(data.with_resources("players_games", "players_profiles"))
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md b/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md
index c9b1ee5e34..e559922c6d 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md
@@ -5,7 +5,7 @@ keywords: [facebook ads api, verified source, facebook ads]
 ---
 import Header from './_source-info-header.md';
 
-# Facebook Ads
+# Facebook ads
 
 <Header/>
 
@@ -14,23 +14,23 @@ Facebook and its affiliated apps like Instagram and Messenger.
 
 This Facebook `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads_pipeline.py)
-loads data using [Facebook Marketing API](https://developers.facebook.com/products/marketing-api/) to the destination of your choice.
+loads data using the [Facebook Marketing API](https://developers.facebook.com/products/marketing-api/) to the destination of your choice.
 
 The endpoints that this verified source supports are:
 
 | Name              | Description                                                                    |
 | ----------------- | ------------------------------------------------------------------------------ |
-| campaigns         | a structured marketing initiative that focuses on a specific objective or goal |
-| ad_sets           | a subset or group of ads within a campaign                                     |
-| ads               | individual advertisement that is created and displayed within an ad set        |
-| creatives         | visual and textual elements that make up an advertisement                      |
-| ad_leads          | information collected from users who have interacted with lead generation ads  |
-| facebook_insights | data on audience demographics, post reach, and engagement metrics              |
+| campaigns         | A structured marketing initiative that focuses on a specific objective or goal |
+| ad_sets           | A subset or group of ads within a campaign                                     |
+| ads               | An individual advertisement that is created and displayed within an ad set      |
+| creatives         | Visual and textual elements that make up an advertisement                      |
+| ad_leads          | Information collected from users who have interacted with lead generation ads  |
+| facebook_insights | Data on audience demographics, post reach, and engagement metrics              |
 
 To get a complete list of sub-endpoints that can be loaded, see
 [facebook_ads/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads/settings.py)
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
@@ -38,7 +38,7 @@ To get a complete list of sub-endpoints that can be loaded, see
 
 1. Ensure that you have Ads Manager active for your Facebook account.
 1. Find your account ID, which is a long number. You can locate it by clicking on the Account
-   Overview dropdown in Ads Manager or by checking the link address. For example
+   Overview dropdown in Ads Manager or by checking the link address. For example,
    https://adsmanager.facebook.com/adsmanager/manage/accounts?act=10150974068878324.
 1. Note this account ID as it will further be used in configuring dlt.
 
@@ -57,31 +57,25 @@ To get a complete list of sub-endpoints that can be loaded, see
    short-lived access token.
 1. Copy the access token and update it in the `.dlt/secrets.toml` file.
 
-#### Exchange _short-lived token_ for a _long-lived token_:
+#### Exchange short-lived token for a long-lived token
 
-By default, Facebook access tokens have a short lifespan of one hour. To exchange a short-lived
-Facebook access token for a long-lived token, update the `.dlt/secrets.toml` with client_id, and
-client_secret and execute the provided Python code.
+By default, Facebook access tokens have a short lifespan of one hour. To exchange a short-lived Facebook access token for a long-lived token, update the `.dlt/secrets.toml` with client_id and client_secret, and execute the provided Python code.
 
 ```py
 from facebook_ads import get_long_lived_token
 print(get_long_lived_token("your short-lived token"))
 ```
 
-Replace the `access_token` in the `.dlt/secrets.toml` file with the long-lived token obtained from
-the above code snippet.
+Replace the `access_token` in the `.dlt/secrets.toml` file with the long-lived token obtained from the above code snippet.
 
-To retrieve the expiry date and the associated scopes of the token, you can use the following
-command:
+To retrieve the expiry date and the associated scopes of the token, you can use the following command:
 
 ```py
 from facebook_ads import debug_access_token
 debug_access_token()
 ```
 
-We highly recommend you to add the token expiration timestamp to get notified a week before token
-expiration that you need to rotate it. Right now the notifications are sent to logger with error
-level. In `config.toml` / `secrets.toml`:
+We highly recommend you add the token expiration timestamp to get notified a week before token expiration that you need to rotate it. Right now, the notifications are sent to the logger with error level. In `config.toml` / `secrets.toml`:
 
 ```toml
 [sources.facebook_ads]
@@ -91,7 +85,6 @@ access_token_expires_at=1688821881
 > Note: The Facebook UI, which is described here, might change.
 The full guide is available at [this link.](https://developers.facebook.com/docs/marketing-apis/overview/authentication)
 
-
 ### Initialize the verified source
 
 To get started with your data pipeline, follow these steps:
@@ -102,24 +95,17 @@ To get started with your data pipeline, follow these steps:
    dlt init facebook_ads duckdb
    ```
 
-   [This command](../../reference/command-line-interface) will initialize
-   [the pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads_pipeline.py)
-   with Facebook Ads as the [source](../../general-usage/source) and
-   [duckdb](../destinations/duckdb.md) as the [destination](../destinations).
+   [This command](../../reference/command-line-interface) will initialize [the pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads_pipeline.py) with Facebook Ads as the [source](../../general-usage/source) and [duckdb](../destinations/duckdb.md) as the [destination](../destinations).
 
-1. If you'd like to use a different destination, simply replace `duckdb` with the name of your
-   preferred [destination](../destinations).
+1. If you'd like to use a different destination, simply replace `duckdb` with the name of your preferred [destination](../destinations).
 
-1. After running this command, a new directory will be created with the necessary files and
-   configuration settings to get started.
+1. After running this command, a new directory will be created with the necessary files and configuration settings to get started.
 
 For more information, read the guide on [how to add a verified source](../../walkthroughs/add-a-verified-source).
 
 ### Add credential
 
-1. Inside the `.dlt` folder, you'll find a file called `secrets.toml`, which is where you can
-   securely store your access tokens and other sensitive information. It's important to handle this
-   file with care and keep it safe. Here's what the file looks like:
+1. Inside the `.dlt` folder, you'll find a file called `secrets.toml`, which is where you can securely store your access tokens and other sensitive information. It's important to handle this file with care and keep it safe. Here's what the file looks like:
 
    ```toml
    # put your secret values and credentials here
@@ -128,15 +114,11 @@ For more information, read the guide on [how to add a verified source](../../wal
    access_token="set me up!"
    ```
 
-1. Replace the access_token value with the [previously copied one](facebook_ads.md#grab-credentials)
-   to ensure secure access to your Facebook Ads resources.
+1. Replace the access_token value with the [previously copied one](facebook_ads.md#grab-credentials) to ensure secure access to your Facebook Ads resources.
 
-1. Next, Follow the [destination documentation](../../dlt-ecosystem/destinations) instructions to
-   add credentials for your chosen destination, ensuring proper routing of your data to the final
-   destination.
+1. Next, follow the [destination documentation](../../dlt-ecosystem/destinations) instructions to add credentials for your chosen destination, ensuring proper routing of your data to the final destination.
 
-1. It is strongly recommended to add the token expiration timestamp to your `config.toml` or
-   `secrets.toml` file.
+1. It is strongly recommended to add the token expiration timestamp to your `config.toml` or `secrets.toml` file.
 
 1. Next, store your pipeline configuration details in the `.dlt/config.toml`.
 
@@ -158,16 +140,16 @@ For more information, read the [General Usage: Credentials.](../../general-usage
    ```sh
    pip install -r requirements.txt
    ```
-1. You're now ready to run the pipeline! To get started, run the following command:
+2. You're now ready to run the pipeline! To get started, run the following command:
    ```sh
    python facebook_ads_pipeline.py
    ```
-1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
+3. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
    ```sh
    dlt pipeline <pipeline_name> show
    ```
-   For example, the `pipeline_name` for the above pipeline example is `facebook_ads`, you may also
+   For example, the `pipeline_name` for the above pipeline example is `facebook_ads`. You may also
    use any custom name instead.
 
 For more information, read the guide on [how to run a pipeline](../../walkthroughs/run-a-pipeline).
@@ -181,12 +163,12 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 You can write your own pipelines to load data to a destination using this verified source. However,
 it is important to note the complete list of the default endpoints given in
-[facebook_ads/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads_dlt/settings.py)
+[facebook_ads/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads/settings.py)
 
 ### Source `facebook_ads_source`
 
 This function returns a list of resources to load campaigns, ad sets, ads, creatives, and ad leads
-data from Facebook Marketing API.
+data from the Facebook Marketing API.
 
 ```py
 @dlt.source(name="facebook_ads")
@@ -200,18 +182,18 @@ def facebook_ads_source(
    ...
 ```
 
-`account_id`: Account id associated with add manager, configured in "config.toml".
+`account_id`: Account ID associated with the ad manager, configured in "config.toml".
 
 `access_token`: Access token associated with the Business Facebook App, configured in
 "secrets.toml".
 
-`chunk_size`: A size of the page and batch request. You may need to decrease it if you request a lot
+`chunk_size`: The size of the page and batch request. You may need to decrease it if you request a lot
 of fields. Defaults to 50.
 
 `request_timeout`: Connection timeout. Defaults to 300.0.
 
-`app_api_version`: A version of the facebook api required by the app for which the access tokens
-were issued i.e. 'v17.0'. Defaults to the _facebook_business_ library default version.
+`app_api_version`: A version of the Facebook API required by the app for which the access tokens
+were issued, e.g., 'v17.0'. Defaults to the _facebook_business_ library default version.
 
 ### Resource `ads`
 
@@ -228,7 +210,7 @@ def ads(
   yield get_data_chunked(account.get_ads, fields, states, chunk_size)
 ```
 
-`fields`: Retrives fields for each ad. For example, “id”, “name”, “adset_id” etc.
+`fields`: Retrieves fields for each ad. For example, “id”, “name”, “adset_id”, etc.
 
 `states`: The possible states include "Active," "Paused," "Pending Review," "Disapproved,"
 "Completed," and "Archived."
@@ -240,15 +222,15 @@ Similar to resource `ads`, the following resources have been defined in the `__i
 
 | Resource     | Description                                                          |
 | ------------ | -------------------------------------------------------------------- |
-| campaigns    | fetches all `DEFAULT_CAMPAIGN_FIELDS`                                |
-| ad_sets      | fetches all `DEFAULT_ADSET_FIELDS`                                   |
-| leads        | fetches all `DEFAULT_LEAD_FIELDS`, uses `@dlt.transformer` decorator |
-| ad_creatives | fetches all `DEFAULT_ADCREATIVE_FIELDS`                              |
+| campaigns    | Fetches all `DEFAULT_CAMPAIGN_FIELDS`                                |
+| ad_sets      | Fetches all `DEFAULT_ADSET_FIELDS`                                   |
+| leads        | Fetches all `DEFAULT_LEAD_FIELDS`, uses `@dlt.transformer` decorator |
+| ad_creatives | Fetches all `DEFAULT_ADCREATIVE_FIELDS`                              |
 
 The default fields are defined in
 [facebook_ads/settings.py](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads/settings.py)
 
-### Source `facebook_insights_source`:
+### Source `facebook_insights_source`
 
 This function returns a list of resources to load facebook_insights.
 
@@ -272,47 +254,35 @@ def facebook_insights_source(
    ...
 ```
 
-`account_id`: Account id associated with ads manager, configured in _config.toml_.
+`account_id`: Account ID associated with ads manager, configured in _config.toml_.
 
-`access_token`: Access token associated with the Business Facebook App, configured in
-_secrets.toml_.
+`access_token`: Access token associated with the Business Facebook App, configured in _secrets.toml_.
 
-`initial_load_past_days`: How many past days (starting from today) to initially load. Defaults to
-30\.
+`initial_load_past_days`: How many past days (starting from today) to initially load. Defaults to 30.
 
-`fields`: A list of fields to include in each report. Note that the “breakdowns” option adds fields
-automatically. Defaults to DEFAULT_INSIGHT_FIELDS.
+`fields`: A list of fields to include in each report. Note that the “breakdowns” option adds fields automatically. Defaults to DEFAULT_INSIGHT_FIELDS.
 
-`attribution_window_days_lag`: Attribution window in days. The reports in the attribution window are
-refreshed on each run. Defaults to 7.
+`attribution_window_days_lag`: Attribution window in days. The reports in the attribution window are refreshed on each run. Defaults to 7.
 
-`time_increment_days`: The report aggregation window in days. use 7 for weekly aggregation. Defaults
-to 1.
+`time_increment_days`: The report aggregation window in days. Use 7 for weekly aggregation. Defaults to 1.
 
-`breakdowns`: A presents with common aggregations. See
-[settings.py](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads/settings.py)
-for details. Defaults to "ads_insights_age_and_gender".
+`breakdowns`: Presents with common aggregations. See [settings.py](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads/settings.py) for details. Defaults to "ads_insights_age_and_gender".
 
-`action_breakdowns`: Action aggregation types. See
-[settings.py](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads/settings.py)
-for details. Defaults to ALL_ACTION_BREAKDOWNS.
+`action_breakdowns`: Action aggregation types. See [settings.py](https://github.com/dlt-hub/verified-sources/blob/master/sources/facebook_ads/settings.py) for details. Defaults to ALL_ACTION_BREAKDOWNS.
 
 `level`: The granularity level. Defaults to "ad".
 
-`action_attribution_windows`: Attribution windows for actions. Defaults to
-ALL_ACTION_ATTRIBUTION_WINDOWS.
+`action_attribution_windows`: Attribution windows for actions. Defaults to ALL_ACTION_ATTRIBUTION_WINDOWS.
 
 `batch_size`: Page size when reading data from a particular report. Defaults to 50.
 
 `request_timeout`: Connection timeout. Defaults to 300.
 
-`app_api_version`: A version of the Facebook API required by the app for which the access tokens
-were issued i.e. 'v17.0'. Defaults to the facebook_business library default version.
+`app_api_version`: A version of the Facebook API required by the app for which the access tokens were issued, e.g., 'v17.0'. Defaults to the facebook_business library default version.
 
 ### Resource `facebook_insights`
 
-This function fetches Facebook insights data incrementally from a specified start date until the
-current date, in day steps.
+This function fetches Facebook insights data incrementally from a specified start date until the current date, in day steps.
 
 ```py
 @dlt.resource(primary_key=INSIGHTS_PRIMARY_KEY, write_disposition="merge")
@@ -324,16 +294,13 @@ def facebook_insights(
    ...
 ```
 
-`date_start`: Parameter sets the initial value for the "date_start" parameter in
-dlt.sources.incremental. It is based on the last pipeline run or defaults to today's date minus the
-specified number of days in the "initial_load_past_days" parameter.
+`date_start`: Parameter sets the initial value for the "date_start" parameter in dlt.sources.incremental. It is based on the last pipeline run or defaults to today's date minus the specified number of days in the "initial_load_past_days" parameter.
 
 ## Customization
 
 ### Create your own pipeline
 
-If you wish to create your own pipelines, you can leverage source and resource methods from this
-verified source.
+If you wish to create your own pipelines, you can leverage source and resource methods from this verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
@@ -345,10 +312,9 @@ verified source.
    )
    ```
 
-   To read more about pipeline configuration, please refer to our
-   [documentation](../../general-usage/pipeline).
+   To read more about pipeline configuration, please refer to our [documentation](../../general-usage/pipeline).
 
-1. To load all the data from, campaigns, ad sets, ads, ad creatives and leads.
+1. To load all the data from campaigns, ad sets, ads, ad creatives, and leads:
 
    ```py
    load_data = facebook_ads_source()
@@ -356,15 +322,14 @@ verified source.
    print(load_info)
    ```
 
-1. To merge the Facebook Ads with the state “DISAPPROVED” and with ads state “PAUSED” you can do the
-   following:
+1. To merge the Facebook Ads with the state "DISAPPROVED" and with ads state "PAUSED", you can do the following:
 
    ```py
    load_data = facebook_ads_source()
-   # It is recommended to enable root key propagation on a source that is not a merge one by default. this is not required if you always use merge but below we start with replace
+   # It is recommended to enable root key propagation on a source that is not a merge one by default. This is not required if you always use merge but below we start with replace
    load_data.root_key = True
 
-   # load only disapproved ads
+   # Load only disapproved ads
    load_data.ads.bind(states=("DISAPPROVED",))
    load_info = pipeline.run(load_data.with_resources("ads"), write_disposition="replace")
    print(load_info)
@@ -376,24 +341,19 @@ verified source.
    print(load_info)
    ```
 
-   In the above steps, we first load the “ads” data with the “DISAPPROVED” state in _replace_ mode
-   and then merge the ads data with the “PAUSED” state on that.
+   In the above steps, we first load the "ads" data with the "DISAPPROVED" state in _replace_ mode and then merge the ads data with the "PAUSED" state on that.
 
-1. To load data with a custom field, for example, to load only “id” from Facebook ads, you can do
-   the following:
+1. To load data with a custom field, for example, to load only "id" from Facebook ads, you can do the following:
 
    ```py
    load_data = facebook_ads_source()
-   # Only loads add ids, works the same for campaigns, leads etc.
+   # Only loads ad ids, works the same for campaigns, leads, etc.
    load_data.ads.bind(fields=("id",))
    load_info = pipeline.run(load_data.with_resources("ads"))
    print(load_info)
    ```
 
-1. This pipeline includes an enrichment transformation called `enrich_ad_objects` that you can apply
-   to any resource to obtain additional data per object using `object.get_api`. The following code
-   demonstrates how to enrich objects by adding an enrichment transformation that includes
-   additional fields.
+1. This pipeline includes an enrichment transformation called `enrich_ad_objects` that you can apply to any resource to obtain additional data per object using `object.get_api`. The following code demonstrates how to enrich objects by adding an enrichment transformation that includes additional fields.
 
    ```py
    # You can reduce the chunk size for smaller requests
@@ -414,34 +374,25 @@ verified source.
    print(load_info)
    ```
 
-   In the above code, the "load_data" object represents the Facebook Ads source, and we specify the
-   desired chunk size for the requests. We then bind the "id" field for the "ad_creatives" resource
-   using the "bind()" method.
+   In the above code, the "load_data" object represents the Facebook Ads source, and we specify the desired chunk size for the requests. We then bind the "id" field for the "ad_creatives" resource using the "bind()" method.
 
-   To enrich the ad_creatives objects, we add a transformation using the "add_step()" method. The
-   "enrich_ad_objects" function is used to specify the AdCreative object type and request the fields
-   defined in _DEFAULT_ADCREATIVE_FIELDS_.
+   To enrich the ad_creatives objects, we add a transformation using the "add_step()" method. The "enrich_ad_objects" function is used to specify the AdCreative object type and request the fields defined in _DEFAULT_ADCREATIVE_FIELDS_.
 
-   Finally, we run the pipeline with the ad_creatives resource and store the load information in the
-   `load_info`.
+   Finally, we run the pipeline with the ad_creatives resource and store the load information in the `load_info`.
 
-1. You can also load insights reports incrementally with defined granularity levels, fields,
-   breakdowns, etc. As defined in the `facebook_insights_source`. This function generates daily
-   reports for a specified number of past days.
+1. You can also load insights reports incrementally with defined granularity levels, fields, breakdowns, etc., as defined in the `facebook_insights_source`. This function generates daily reports for a specified number of past days.
 
    ```py
    load_data = facebook_insights_source(
        initial_load_past_days=30,
-       attribution_window_days_lag= 7,
+       attribution_window_days_lag=7,
        time_increment_days=1
    )
    load_info = pipeline.run(load_data)
    print(load_info)
    ```
 
-> By default, daily reports are generated from `initial_load_past_days` ago to today. On subsequent
-> runs, only new reports are loaded, with the past `attribution_window_days_lag` days (default is 7)
-> being refreshed to accommodate any changes. You can adjust `time_increment_days` to change report
-> frequency (default set to one).
+> By default, daily reports are generated from `initial_load_past_days` ago to today. On subsequent runs, only new reports are loaded, with the past `attribution_window_days_lag` days (default is 7) being refreshed to accommodate any changes. You can adjust `time_increment_days` to change report frequency (default set to one).
 
 <!--@@@DLT_TUBA facebook_ads-->
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/freshdesk.md b/docs/website/docs/dlt-ecosystem/verified-sources/freshdesk.md
index 8990af83cb..63c26de670 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/freshdesk.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/freshdesk.md
@@ -23,14 +23,14 @@ Resources that can be loaded using this verified source are:
 
 | S.No. | Name      | Description                                                                               |
 | ----- | --------- | ----------------------------------------------------------------------------------------- |
-| 1.    | agents    |  Users responsible for managing and resolving customer inquiries and support tickets.     |
-| 2.    | companies |  Customer organizations or groups that agents support.                                    |
-| 3.    | contacts  |  Individuals or customers who reach out for support.                                      |
-| 4.    | groups    |  Agents organized based on specific criteria.                                             |
-| 5.    | roles     |  Predefined sets of permissions that determine what actions an agent can perform.         |
-| 6.    | tickets   |  Customer inquiries or issues submitted via various channels like email, chat, phone, etc. |
+| 1.    | agents    | Users responsible for managing and resolving customer inquiries and support tickets.       |
+| 2.    | companies | Customer organizations or groups that agents support.                                      |
+| 3.    | contacts  | Individuals or customers who reach out for support.                                        |
+| 4.    | groups    | Agents organized based on specific criteria.                                               |
+| 5.    | roles     | Predefined sets of permissions that determine what actions an agent can perform.           |
+| 6.    | tickets   | Customer inquiries or issues submitted via various channels like email, chat, phone, etc.  |
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
@@ -76,8 +76,8 @@ For more information, read the guide on [how to add a verified source](../../wal
    # Put your secret values and credentials here
    # Github access token (must be classic for reactions source)
    [sources.freshdesk]
-   domain = "please set me up!" # Enter the freshdesk domain here
-   api_secret_key = "please set me up!" # Enter the freshdesk API key here
+   domain = "please set me up!" # Enter the Freshdesk domain here
+   api_secret_key = "please set me up!" # Enter the Freshdesk API key here
    ```
 1. In the `domain`, enter the domain of your Freshdesk account.
 
@@ -90,17 +90,17 @@ For more information, read the guide on [how to add a verified source](../../wal
    ```sh
    pip install -r requirements.txt
    ```
-1. You're now ready to run the pipeline! To get started, run the following command:
+2. You're now ready to run the pipeline! To get started, run the following command:
    ```sh
    python freshdesk_pipeline.py
    ```
-1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
+3. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
    ```sh
    dlt pipeline <pipeline_name> show
    ```
    For example, the `pipeline_name` for the above pipeline example is
-   `freshdesk_pipeline`, you may also use any custom name instead.
+   `freshdesk_pipeline`. You may also use any custom name instead.
 
 For more information, read the guide on [how to run a pipeline](../../walkthroughs/run-a-pipeline).
 
@@ -111,7 +111,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 ### Source `freshdesk_source`
 
-This function retrives the data from specified Freshdesk API endpoints.
+This function retrieves the data from specified Freshdesk API endpoints.
 
 ```py
 @dlt.source()
@@ -160,7 +160,7 @@ def freshdesk_source(
 
 `write_disposition`: Specifies the write disposition to load data.
 
-`primary_key`: Specifies "id" as primary key of the resource.
+`primary_key`: Specifies "id" as the primary key of the resource.
 
 ## Customization
 ### Create your own pipeline
@@ -180,7 +180,7 @@ verified source.
    To read more about pipeline configuration, please refer to our
    [documentation](../../general-usage/pipeline).
 
-1. To load data from all the endpoints, specified in ["settings.py".](https://github.com/dlt-hub/verified-sources/blob/master/sources/freshdesk/settings.py)
+2. To load data from all the endpoints, specified in ["settings.py".](https://github.com/dlt-hub/verified-sources/blob/master/sources/freshdesk/settings.py)
    ```py
    load_data = freshdesk_source()
    # Run the pipeline
@@ -189,7 +189,7 @@ verified source.
    print(load_info)
    ```
 
-1. To load the data from "agents", "contacts", and "tickets":
+3. To load the data from "agents", "contacts", and "tickets":
    ```py
    load_data = freshdesk_source().with_resources("agents", "contacts", "tickets")
    # Run the pipeline
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/google_ads.md b/docs/website/docs/dlt-ecosystem/verified-sources/google_ads.md
index ae6df133ef..5e8b247ffd 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/google_ads.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/google_ads.md
@@ -5,20 +5,15 @@ keywords: [google ads api, google ads verified source, google ads]
 ---
 import Header from './_source-info-header.md';
 
-# Google Ads
+# Google ads
 
-[Google Ads](https://ads.google.com/home/) is a digital advertising service by Google that allows advertisers
-to display ads across Google's search results, websites, and other platforms.
+[Google Ads](https://ads.google.com/home/) is a digital advertising service by Google that allows advertisers to display ads across Google's search results, websites, and other platforms.
 
 :::warning Alert!
-Please note that we are unable to conduct regular testing on the specified source due to difficulties
-in obtaining the necessary credentials. We confirmed this source works at creation, and it is being used by the community.
-We anticipate that the source should operate smoothly over time given Google's best pratices in versioning apis.
+Please note that we are unable to conduct regular testing on the specified source due to difficulties in obtaining the necessary credentials. We confirmed this source works at creation, and it is being used by the community. We anticipate that the source should operate smoothly over time given Google's best practices in versioning APIs.
 :::
 
-This Google Ads `dlt` verified source and
-[pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/google_ads_pipeline.py)
-loads data using the "Google Ads API" to the destination of your choice.
+This Google Ads `dlt` verified source and [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/google_ads_pipeline.py) loads data using the "Google Ads API" to the destination of your choice.
 
 Resources that can be loaded using this verified source are:
 
@@ -29,42 +24,33 @@ Resources that can be loaded using this verified source are:
 | change_events    | Modifications made to an account's ads, campaigns, and related settings |
 | customer_clients | Accounts that are managed by a given account                            |
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
-To access Google Ads verified sources, you'll need a developer token. For instructions on obtaining
-one, you can search online or ask GPT.
+To access Google Ads verified sources, you'll need a developer token. For instructions on obtaining one, you can search online or ask GPT.
 
 Next, there are two methods to get authenticated for using this verified source:
 
 - OAuth credentials
 - Service account credentials
 
-Let's go over how to set up both OAuth tokens and service account credentials. In general, OAuth
-tokens are preferred when user consent is required, while service account credentials are better
-suited for server-to-server interactions. You can choose the method of authentication as per your
-requirement.
+Let's go over how to set up both OAuth tokens and service account credentials. In general, OAuth tokens are preferred when user consent is required, while service account credentials are better suited for server-to-server interactions. You can choose the method of authentication as per your requirement.
 
 ### Grab Google service account credentials
 
-You need to create a GCP service account to get API credentials if you don't have one. To create
-one, follow these steps:
+You need to create a GCP service account to get API credentials if you don't have one. To create one, follow these steps:
 
 1. Sign in to [console.cloud.google.com](http://console.cloud.google.com/).
 
-1. [Create a service account](https://cloud.google.com/iam/docs/service-accounts-create#creating) if
-   needed.
+1. [Create a service account](https://cloud.google.com/iam/docs/service-accounts-create#creating) if needed.
 
-1. Enable the "Google Ads API". Refer to the
-   [Google documentation](https://support.google.com/googleapi/answer/6158841?hl=en) for
-   comprehensive instructions on this process.
+1. Enable the "Google Ads API". Refer to the [Google documentation](https://support.google.com/googleapi/answer/6158841?hl=en) for comprehensive instructions on this process.
 
 1. Generate credentials:
 
    1. Navigate to IAM & Admin in the console's left panel, and then select Service Accounts.
-   1. Identify the service account you intend to use, and click on the three-dot menu under the
-      "Actions" column next to it.
+   1. Identify the service account you intend to use, and click on the three-dot menu under the "Actions" column next to it.
    1. Create a new JSON key by selecting "Manage Keys" > "ADD KEY" > "CREATE".
    1. You can download the ".json" file containing the necessary credentials for future use.
 
@@ -110,7 +96,7 @@ python google_ads/setup_script_gcp_oauth.py
 Once you have executed the script and completed the authentication, you will receive a "refresh
 token" that can be used to set up the "secrets.toml".
 
-### Share the Google Ads Account with the API:
+### Share the Google Ads account with the API:
 
 :::note
 For service account authentication, use the client_email. For OAuth authentication, use the
@@ -162,8 +148,8 @@ For more information, read the guide on [how to add a verified source](../../wal
 ### Add credentials
 
 1. In the `.dlt` folder, there's a file called `secrets.toml`. It's where you store sensitive
-   information securely, like access tokens. Keep this file safe. In this file setup the "developer
-   token", "customer ID" and "impersonated_email" as follows:
+   information securely, like access tokens. Keep this file safe. In this file, set up the "developer
+   token", "customer ID", and "impersonated_email" as follows:
    ```toml
    [sources.google_ads]
    dev_token = "please set me up!"
@@ -275,11 +261,11 @@ def customers(
     """
 ```
 
-`client`: refers to a Google API Resource object used to interact with Google services.
+`client`: Refers to a Google API Resource object used to interact with Google services.
 
-`customer_id`: Individual identifier for google ads account.
+`customer_id`: Individual identifier for a Google Ads account.
 
-Similarly, there are resource functions called `campaigns`, `change_events` and `customer_clients` that populate
+Similarly, there are resource functions called `campaigns`, `change_events`, and `customer_clients` that populate
 respective dimensions.
 
 ## Customization
@@ -318,3 +304,4 @@ verified source.
    ```
 
 <!--@@@DLT_TUBA hubspot-->
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md b/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md
index 83077270c7..02c651a603 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md
@@ -10,18 +10,18 @@ import Header from './_source-info-header.md';
 <Header/>
 
 HubSpot is a customer relationship management (CRM) software and inbound marketing platform that
-helps businesses to attract visitors, engage customers, and close leads.
+helps businesses attract visitors, engage customers, and close leads.
 
-This Hubspot `dlt` verified source and
+This HubSpot `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/hubspot_pipeline.py)
-loads data using “Hubspot API” to the destination of your choice.
+loads data using the “HubSpot API” to the destination of your choice.
 
 | Name                       | Description                                                            |
 | -------------------------- | ---------------------------------------------------------------------- |
 | contacts                   | visitors, potential customers, leads                                   |
 | companies                  | information about organizations                                        |
 | deals                      | deal records, deal tracking                                            |
-| tickets                    | request for help from customers or users                               |
+| tickets                    | requests for help from customers or users                              |
 | products                   | pricing information of a product                                       |
 | quotes                     | price proposals that salespeople can create and send to their contacts |
 | hubspot_events_for_objects | web analytics events for a given object type and object ids            |
@@ -29,12 +29,12 @@ loads data using “Hubspot API” to the destination of your choice.
 To get details about endpoints that can be loaded, see
 [hubspot/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/hubspot/settings.py)
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
 > Note: As of November 30, 2022, HubSpot API Keys are being deprecated and are no longer supported.
-Instead, we recommend to authenticate using a private app access token or OAuth access token.
+Instead, we recommend authenticating using a private app access token or OAuth access token.
 
 Create a private app and get an authentication token before running the
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/hubspot_pipeline.py).
@@ -61,7 +61,7 @@ Follow these steps:
 1. Click "Show token" and store it for ".dlt/secrets.toml".
 
 
-> Note: The Hubspot UI, which is described here, might change.
+> Note: The HubSpot UI, which is described here, might change.
 The full guide is available at [this link.](https://knowledge.hubspot.com/integrations/how-do-i-get-my-hubspot-api-key)
 
 
@@ -77,7 +77,7 @@ To get started with your data pipeline, follow these steps:
 
    [This command](../../reference/command-line-interface) will initialize
    [the pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/hubspot_pipeline.py)
-   with Hubspot as the [source](../../general-usage/source) and [duckdb](../destinations/duckdb.md)
+   with HubSpot as the [source](../../general-usage/source) and [duckdb](../destinations/duckdb.md)
    as the [destination](../destinations).
 
 1. If you'd like to use a different destination, simply replace `duckdb` with the name of your
@@ -90,19 +90,16 @@ For more information, read the guide on [how to add a verified source](../../wal
 
 ### Add credentials
 
-1. Inside the `.dlt` folder, you'll find a file called `secrets.toml`, which is where you can
-   securely store your access tokens and other sensitive information. It's important to handle this
-   file with care and keep it safe. Here's what the file looks like:
+1. Inside the `.dlt` folder, you'll find a file called `secrets.toml`, which is where you can securely store your access tokens and other sensitive information. It's important to handle this file with care and keep it safe. Here's what the file looks like:
 
    ```toml
    # put your secret values and credentials here
-   # do not share this file and do not push it to github
+   # do not share this file and do not push it to GitHub
    [sources.hubspot]
     api_key = "api_key" # please set me up!
    ```
 
-1. Replace the access_token value with the [previously copied one](hubspot.md#grab-credentials) to
-   ensure secure access to your Hubspot resources.
+1. Replace the access_token value with the [previously copied one](hubspot.md#grab-credentials) to ensure secure access to your Hubspot resources.
 
 1. Enter credentials for your chosen destination as per the [docs](../destinations/).
 
@@ -110,8 +107,7 @@ For more information, read the [General Usage: Credentials.](../../general-usage
 
 ## Run the pipeline
 
-1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
-   running the command:
+1. Before running the pipeline, ensure that you have installed all the necessary dependencies by running the command:
    ```sh
    pip install -r requirements.txt
    ```
@@ -119,31 +115,25 @@ For more information, read the [General Usage: Credentials.](../../general-usage
    ```sh
    python hubspot_pipeline.py
    ```
-1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
-   the following command:
+1. Once the pipeline has finished running, you can verify that everything loaded correctly by using the following command:
    ```sh
    dlt pipeline <pipeline_name> show
    ```
-   For example, the `pipeline_name` for the above pipeline example is `hubspot_pipeline`, you may
-   also use any custom name instead.
+   For example, the `pipeline_name` for the above pipeline example is `hubspot_pipeline`, you may also use any custom name instead.
 
 For more information, read the guide on [how to run a pipeline](../../walkthroughs/run-a-pipeline).
 
 ## Sources and resources
 
-`dlt` works on the principle of [sources](../../general-usage/source) and
-[resources](../../general-usage/resource).
+`dlt` works on the principle of [sources](../../general-usage/source) and [resources](../../general-usage/resource).
 
 ### Default endpoints
 
-You can write your own pipelines to load data to a destination using this verified source. However,
-it is important to note the complete list of the default endpoints given in
-[hubspot/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/hubspot/settings.py)
+You can write your own pipelines to load data to a destination using this verified source. However, it is important to note the complete list of the default endpoints given in [hubspot/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/hubspot/settings.py)
 
 ### Source `hubspot`
 
-This function returns a list of resources to load companies, contacts, deals, tickets, products, and
-web analytics events data into the destination.
+This function returns a list of resources to load companies, contacts, deals, tickets, products, and web analytics events data into the destination.
 
 ```py
 @dlt.source(name="hubspot")
@@ -156,13 +146,11 @@ def hubspot(
 
 `api_key`: The key used to authenticate with the HubSpot API. Configured in "secrets.toml".
 
-`include_history`: This parameter, when set to "True", loads the history of property changes for the
-specified entities.
+`include_history`: This parameter, when set to "True", loads the history of property changes for the specified entities.
 
 ### Resource `companies`
 
-This resource function fetches data from the "companies" endpoint and loads it to
-the destination, replacing any existing data.
+This resource function fetches data from the "companies" endpoint and loads it to the destination, replacing any existing data.
 
 ```py
 @dlt.resource(name="companies", write_disposition="replace")
@@ -182,16 +170,11 @@ def companies(
    )
 ```
 
-This resource function takes the same arguments, `api_key` and `include_history` as the "husbpot"
-source described [above](hubspot.md#source-hubspot), but also supports two additional.
-`include_custom_props` - indicates if all the properties of CRM objects, except Hubspot driven
-(prefixed with `hs_`), are to be extracted. `props` - the list of properties to extract
-in addition to the custom properties. Similar to this, resource functions "contacts",
-"deals", "tickets", "products", and "quotes" retrieve data from the Hubspot API.
+This resource function takes the same arguments, `api_key` and `include_history` as the "hubspot" source described [above](hubspot.md#source-hubspot), but also supports two additional parameters. `include_custom_props` - indicates if all the properties of CRM objects, except Hubspot driven (prefixed with `hs_`), are to be extracted. `props` - the list of properties to extract in addition to the custom properties. Similar to this, resource functions "contacts", "deals", "tickets", "products", and "quotes" retrieve data from the Hubspot API.
 
 ### Resource `hubspot_events_for_objects`
 
-This function loads web analytics events for specific objects from Hubspot API into the destination.
+This function loads web analytics events for specific objects from the Hubspot API into the destination.
 
 ```py
 @dlt.resource
@@ -207,11 +190,11 @@ def hubspot_events_for_objects(
 `object_type`: One of the Hubspot object types as defined in
 [hubspot/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/hubspot/settings.py).
 
-`object_ids`: List of object ids to track events.
+`object_ids`: List of object IDs to track events.
 
 `api_key`: The key used to authenticate with the HubSpot API. Configured in "secrets.toml".
 
-`start_date`: The initial date time from which start getting events, default to "01-01-2000",
+`start_date`: The initial date time from which to start getting events, default to "01-01-2000",
 configured in
 [hubspot/settings.py.](https://github.com/dlt-hub/verified-sources/blob/master/sources/hubspot/settings.py).
 
@@ -243,14 +226,14 @@ verified source.
    print(load_info)
    ```
 
-1. To load data from contacts and companies, with time history using "with_resources" method.
+1. To load data from contacts and companies, with time history using the "with_resources" method.
 
    ```py
    load_data = hubspot(include_history=True).with_resources("companies","contacts")
    load_info = pipeline.run(load_data)
    print(load_info)
    ```
-    1. `include_history` loads property change history and entities as separate tables. By default set as False.
+    1. `include_history` loads property change history and entities as separate tables. By default, it is set as False.
 
 1. By default, all the custom properties of a CRM object are extracted. If you want only particular fields,
     set the flag `include_custom_props=False` and add a list of properties with the `props` arg.
@@ -261,7 +244,7 @@ verified source.
    load_info = pipeline.run(load_data.with_resources("contacts"))
    ```
 
-1. If you want to read all the custom properties of CRM objects and some additional (e.g. Hubspot driven) properties.
+1. If you want to read all the custom properties of CRM objects and some additional (e.g., Hubspot driven) properties.
 
    ```py
    load_data = hubspot()
@@ -274,7 +257,7 @@ verified source.
 
    ```py
    resource = hubspot_events_for_objects("company", ["7086461639", "7086464459"])
-   # Here, object type : company, and object ids : 7086461639 and 7086464459
+   # Here, object type: company, and object IDs: 7086461639 and 7086464459
    load_info = pipeline.run([resource])
    print(load_info)
    ```
@@ -286,7 +269,7 @@ verified source.
 ### Additional info
 If you encounter the following error while processing your request:
 :::warning ERROR
-Your request to HubSpot is too long to process. Maximum allowed query length is 2000 symbols, ... while your list is
+Your request to HubSpot is too long to process. The maximum allowed query length is 2000 symbols, while your list is
 2125 symbols long.
 :::
 
@@ -305,3 +288,4 @@ info = p.run(hubspot(include_custom_props=False))
 Or, if you wish to include them, you can modify `settings.py`.
 
 <!--@@@DLT_TUBA hubspot-->
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/index.md b/docs/website/docs/dlt-ecosystem/verified-sources/index.md
index a3d2ba00a7..d37b4393d4 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/index.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/index.md
@@ -20,7 +20,7 @@ item => item.label === '30+ SQL Databases' || item.label === 'REST APIs' || item
 Choose from our collection of verified sources, developed and maintained by the `dlt` team and community. Each source is rigorously tested against a real API and provided as Python code for easy customization.
 
 :::tip
-If you couldn't find a source implementation, you can easily create your own, check out the [resource page](../../general-usage/resource) to learn how!
+If you couldn't find a source implementation, you can easily create your own. Check out the [resource page](../../general-usage/resource) to learn how!
 :::
 
 <DocCardList items={useCurrentSidebarCategory().items.filter(
@@ -31,7 +31,7 @@ item => item.label !== '30+ SQL Databases' && item.label !== 'REST APIs' && item
 
 The main difference between the [core sources](#core-sources) and [verified sources](#verified-sources) lies in their structure.
 Core sources are generic collections, meaning they can connect to a variety of systems. For example, the [SQL Database source](sql_database) can connect to any
-database which supports SQLAlchemy.
+database that supports SQLAlchemy.
 
 According to our telemetry, core sources are the most widely used among our users!
 
@@ -45,4 +45,5 @@ your working directory.
 
 * Source missing? [Request a new verified source.](https://github.com/dlt-hub/verified-sources/issues/new?template=source-request.md)
 * Missing endpoint or a feature? [Request or contribute](https://github.com/dlt-hub/verified-sources/issues/new?template=extend-a-source.md)
-* [Join our Slack community](https://dlthub.com/community) and ask in the technical-help channel.
\ No newline at end of file
+* [Join our Slack community](https://dlthub.com/community) and ask in the technical-help channel.
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md b/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md
index 0a6ba8c632..9225797773 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md
@@ -14,16 +14,16 @@ documents.
 
 This MongoDB `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/mongodb_pipeline.py)
-loads data using "MongoDB" source to the destination of your choice.
+loads data using the "MongoDB" source to the destination of your choice.
 
 Sources and resources that can be loaded using this verified source are:
 
 | Name               | Description                                |
 |--------------------|--------------------------------------------|
-| mongodb            | loads a specific MongoDB database          |
-| mongodb_collection | loads a collection from a MongoDB database |
+| mongodb            | Loads a specific MongoDB database          |
+| mongodb_collection | Loads a collection from a MongoDB database |
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
@@ -42,22 +42,22 @@ Here are the typical ways to configure MongoDB and their connection URLs:
 
 | Name                | Description                                                                           | Connection URL Example                            |
 |---------------------|---------------------------------------------------------------------------------------|---------------------------------------------------|
-| Local Installation  | Install on Windows, macOS, Linux using official packages.                             | "mongodb://dbuser:passwd@host.or.ip:27017"        |
+| Local installation  | Install on Windows, macOS, Linux using official packages.                             | "mongodb://dbuser:passwd@host.or.ip:27017"        |
 | Docker              | Deploy using the MongoDB Docker image.                                                | "mongodb://dbuser:passwd@docker.host:27017"       |
 | MongoDB Atlas       | MongoDB’s managed service on AWS, Azure, and Google Cloud.                            | "mongodb+srv://dbuser:passwd@cluster.mongodb.net" |
-| Managed Cloud       | AWS DocumentDB, Azure Cosmos DB, and others offer MongoDB as a managed database.      | "mongodb://dbuser:passwd@managed.cloud:27017"     |
-| Configuration Tools | Use Ansible, Chef, or Puppet for automation of setup and configuration.               | "mongodb://dbuser:passwd@config.tool:27017"       |
-| Replica Set         | Set up for high availability with data replication across multiple MongoDB instances. | "mongodb://dbuser:passwd@replica.set:27017"       |
-| Sharded Cluster     | Scalable distribution of datasets across multiple MongoDB instances.                  | "mongodb://dbuser:passwd@shard.cluster:27017"     |
+| Managed cloud       | AWS DocumentDB, Azure Cosmos DB, and others offer MongoDB as a managed database.      | "mongodb://dbuser:passwd@managed.cloud:27017"     |
+| Configuration tools | Use Ansible, Chef, or Puppet for automation of setup and configuration.               | "mongodb://dbuser:passwd@config.tool:27017"       |
+| Replica set         | Set up for high availability with data replication across multiple MongoDB instances. | "mongodb://dbuser:passwd@replica.set:27017"       |
+| Sharded cluster     | Scalable distribution of datasets across multiple MongoDB instances.                  | "mongodb://dbuser:passwd@shard.cluster:27017"     |
 | Kubernetes          | Deploy on Kubernetes using Helm charts or operators.                                  | "mongodb://dbuser:passwd@k8s.cluster:27017"       |
-| Manual Tarball      | Install directly from the official MongoDB tarball, typically on Linux.               | "mongodb://dbuser:passwd@tarball.host:27017"      |
+| Manual tarball      | Install directly from the official MongoDB tarball, typically on Linux.               | "mongodb://dbuser:passwd@tarball.host:27017"      |
 
 > Note: The provided URLs are example formats; adjust as needed for your specific setup.
 
 #### Grab `database and collections`
 
-1. To grab "database and collections" you must have MongoDB shell installed. For installation
-   guidance, refer to [documentation here.](https://www.mongodb.com/docs/mongodb-shell/install/)
+1. To grab "database and collections," you must have the MongoDB shell installed. For installation
+   guidance, refer to [the documentation here.](https://www.mongodb.com/docs/mongodb-shell/install/)
 
 1. Modify the example URLs with your credentials (dbuser & passwd) and host details.
 
@@ -67,19 +67,19 @@ Here are the typical ways to configure MongoDB and their connection URLs:
    mongo "mongodb://dbuser:passwd@your_host:27017"
    ```
 
-1. List all Databases:
+1. List all databases:
 
    ```sh
    show dbs
    ```
 
-1. View Collections in a Database:
+1. View collections in a database:
 
-   1. Switch to Database:
+   1. Switch to the database:
       ```sh
       use your_database_name
       ```
-   1. Display its Collections:
+   1. Display its collections:
       ```sh
       show collections
       ```
@@ -90,7 +90,7 @@ Here are the typical ways to configure MongoDB and their connection URLs:
    exit
    ```
 
->Note the database and collection names for future source configuration.
+> Note the database and collection names for future source configuration.
 
 ### Prepare your data
 
@@ -239,10 +239,10 @@ def mongodb_collection(
 
 
 ## Customization
+
 ### Create your own pipeline
 
-If you wish to create your own pipelines, you can leverage source and resource methods from this
-verified source.
+If you wish to create your own pipelines, you can leverage source and resource methods from this verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
@@ -262,7 +262,7 @@ verified source.
    print(load_info)
    ```
 
-1. To load a specific collections from the database:
+1. To load specific collections from the database:
 
    ```py
    load_data = mongodb().with_resources("collection_1", "collection_2")
@@ -274,12 +274,12 @@ verified source.
 
    ```py
    load_data = mongodb(incremental=dlt.sources.incremental("date")).with_resources("collection_1")
-   load_info = pipeline.run(load_data, write_disposition = "merge")
+   load_info = pipeline.run(load_data, write_disposition="merge")
    print(load_info)
    ```
-   > Data is loaded incrementally based on "date" field.
+   > Data is loaded incrementally based on the "date" field.
 
-1. To load data from a particular collection say "movies" incrementally:
+1. To load data from a particular collection, say "movies," incrementally:
 
    ```py
    load_data = mongodb_collection(
@@ -293,7 +293,7 @@ verified source.
    ```
 
    > The source function "mongodb_collection" loads data from a particular single
-   > collection, where as source "mongodb" can load data from multiple collections.
+   > collection, whereas the source "mongodb" can load data from multiple collections.
    > This script configures incremental loading from the "movies" collection based on the
    > "lastupdated" field, starting from midnight on September 10, 2020.
 
@@ -311,7 +311,7 @@ verified source.
 
    ```
 
-   > It applies hint for incremental loading based on the "last_scraped" field, ideal for tables
+   > It applies a hint for incremental loading based on the "last_scraped" field, ideal for tables
    > with additions but no updates.
 
 1. To load a selected collection and rename it in the destination:
@@ -330,15 +330,16 @@ verified source.
 
 1. To load a selected collection, using Apache Arrow for data conversion:
    ```py
-   # Load collection "movies", using Apache Arrow for converion
+   # Load collection "movies", using Apache Arrow for conversion
    movies = mongodb_collection(
       collection="movies",
       data_item_format="arrow",
    )
 
    # Run the pipeline
-   info = pipeline.run(source)
+   info = pipeline.run(movies)
    print(info)
    ```
 
 <!--@@@DLT_TUBA mongodb-->
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/mux.md b/docs/website/docs/dlt-ecosystem/verified-sources/mux.md
index 37368110e4..2ae14de2dc 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/mux.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/mux.md
@@ -14,7 +14,7 @@ import Header from './_source-info-header.md';
 
 This Mux `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/mux_pipeline.py)
-loads data using “Mux API” to the destination of your choice.
+loads data using the “Mux API” to the destination of your choice.
 
 
 | Name        | Description                                                                                         |
@@ -24,7 +24,7 @@ loads data using “Mux API” to the destination of your choice.
 
 > Note: The source `mux_source` loads all video assets, but each video view is for yesterday only!
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
@@ -74,7 +74,7 @@ For more information, read the guide on [how to add a verified source.](../../wa
     Here's what the file looks like:
 
     ```toml
-    # Put your secret values and credentials here. Do not share this file and do not push it to github
+    # Put your secret values and credentials here. Do not share this file and do not push it to GitHub
     [sources.mux]
     mux_api_access_token = "please set me up" # Mux API access token
     mux_api_secret_key = "please set me up!" # Mux API secret key
@@ -94,11 +94,11 @@ For more information, read the [General Usage: Credentials.](../../general-usage
    ```sh
    pip install -r requirements.txt
    ```
-1. You're now ready to run the pipeline! To get started, run the following command:
+2. You're now ready to run the pipeline! To get started, run the following command:
    ```sh
    python mux_pipeline.py
    ```
-1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
+3. Once the pipeline has finished running, you can verify that everything loaded correctly by using
    the following command:
    ```sh
    dlt pipeline <pipeline_name> show
@@ -159,7 +159,7 @@ def views_resource(
     ...
 ```
 
-The arguments `mux_api_access_token`, `mux_api_secret_key` and `limit` are the same as described [above](#resource-assets_resource) in "asset_resource".
+The arguments `mux_api_access_token`, `mux_api_secret_key`, and `limit` are the same as described [above](#resource-assets_resource) in "asset_resource".
 
 
 ## Customization
@@ -178,21 +178,21 @@ verified source.
     )
     ```
 
-1. To load metadata about every asset to be loaded:
+2. To load metadata about every asset to be loaded:
 
     ```py
     load_info = pipeline.run(mux_source().with_resources("assets_resource"))
     print(load_info)
     ```
 
-1. To load data for each video view from yesterday:
+3. To load data for each video view from yesterday:
 
     ```py
     load_info = pipeline.run(mux_source().with_resources("views_resource"))
     print(load_info)
     ```
 
-1. To load both metadata about assets and video views from yesterday:
+4. To load both metadata about assets and video views from yesterday:
 
     ```py
     load_info = pipeline.run(mux_source())
@@ -200,3 +200,4 @@ verified source.
     ```
 
 <!--@@@DLT_TUBA mux-->
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/pg_replication.md b/docs/website/docs/dlt-ecosystem/verified-sources/pg_replication.md
index 7934dd0067..d3ed47905f 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/pg_replication.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/pg_replication.md
@@ -18,13 +18,13 @@ Resources that can be loaded using this verified source are:
 | replication_resource | Load published messages from a replication slot |
 
 :::info
-The postgres replication source currently **does not** suppport the [scd2 merge strategy](../../general-usage/incremental-loading#scd2-strategy). 
+The Postgres replication source currently **does not** support the [scd2 merge strategy](../../general-usage/incremental-loading#scd2-strategy). 
 :::
 
-## Setup Guide
+## Setup guide
 
 ### Setup user
-To setup a Postgres user follow these steps:
+To set up a Postgres user, follow these steps:
 
 1. The Postgres user needs to have the `LOGIN` and `REPLICATION` attributes assigned:
     
@@ -40,17 +40,17 @@ To setup a Postgres user follow these steps:
     
 
 ### Set up RDS
-To setup a Postgres user on RDS follow these steps:
+To set up a Postgres user on RDS, follow these steps:
 
-1. You must enable replication for RDS Postgres instance via [Parameter Group](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PostgreSQL.Replication.ReadReplicas.html)
+1. You must enable replication for the RDS Postgres instance via [Parameter Group](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_PostgreSQL.Replication.ReadReplicas.html).
 
-2. `WITH LOGIN REPLICATION;` does not work on RDS, instead do:
+2. `WITH LOGIN REPLICATION;` does not work on RDS; instead, do:
     
     ```sql
     GRANT rds_replication TO replication_user;
     ```
     
-3. Do not fallback to non-SSL connection by setting connection parameters:
+3. Do not fallback to a non-SSL connection by setting connection parameters:
     
    ```toml
    sources.pg_replication.credentials="postgresql://loader:password@host.rds.amazonaws.com:5432/dlt_data?sslmode=require&connect_timeout=300"
@@ -70,13 +70,13 @@ To get started with your data pipeline, follow these steps:
     
 2. If you'd like to use a different destination, simply replace `duckdb` with the name of your preferred [destination](../../dlt-ecosystem/destinations).
     
-3. This source uses `sql_database` source, you can init it as follows:
+3. This source uses the `sql_database` source; you can initialize it as follows:
     
    ```sh
    dlt init sql_database duckdb
    ```
    :::note
-   It is important to note that It is now only required if a user performs an initial load, specifically when `persist_snapshots` is set to `True`.
+   It is important to note that it is now only required if a user performs an initial load, specifically when `persist_snapshots` is set to `True`.
    :::
     
 4. After running these two commands, a new directory will be created with the necessary files and configuration settings to get started.
@@ -87,6 +87,7 @@ To get started with your data pipeline, follow these steps:
    You can omit the `[sql.sources.credentials]` section in `secrets.toml` as it is not required.
    :::
 
+
 ### Add credentials
 
 1. In the `.dlt` folder, there's a file called `secrets.toml`. It's where you store sensitive information securely, like access tokens. Keep this file safe.
@@ -162,9 +163,9 @@ def replication_resource(
 
 `pub_name`: Publication slot name to publish messages.
 
-`include_columns`: Maps table name(s) to sequence of names of columns to include in the generated data items. Any column not in the sequence is excluded. If not provided, all columns are included
+`include_columns`: Maps table name(s) to a sequence of names of columns to include in the generated data items. Any column not in the sequence is excluded. If not provided, all columns are included.
 
-`columns`:  Maps table name(s) to column hints to apply on the replicated table(s)
+`columns`:  Maps table name(s) to column hints to apply on the replicated table(s).
 
 `target_batch_size`: Desired number of data items yielded in a batch. Can be used to limit the data items in memory.
 
@@ -256,7 +257,7 @@ If you wish to create your own pipelines, you can leverage source and resource m
    dest_pl.run(changes)
    ```
     
-8. To replicate tables with selected columns you can use the `include_columns` argument as follows:
+8. To replicate tables with selected columns, you can use the `include_columns` argument as follows:
     
    ```py
    # requires the Postgres user to have the REPLICATION attribute assigned
@@ -273,3 +274,4 @@ If you wish to create your own pipelines, you can leverage source and resource m
    ```
     
    Similarly, to replicate changes from selected columns, you can use the `table_names` and `include_columns` arguments in the `replication_resource` function.
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md b/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md
index 85216f3206..8864b3b629 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md
@@ -14,36 +14,38 @@ and customer relationship management, encompassing sales, marketing, and custome
 
 This Salesforce `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/salesforce_pipeline.py)
-loads data using “Salesforce API” to the destination of your choice.
+loads data using the “Salesforce API” to the destination of your choice.
 
 The resources that this verified source supports are:
 
 | Name           | Mode    | Description                                                                                       |
 |----------------|---------|---------------------------------------------------------------------------------------------------|
-| User           | replace | refers to an individual who has access to a Salesforce org or instance                            |
-| UserRole       | replace | a standard object that represents a role within the organization's hierarchy                      |
-| Lead           | replace | prospective customer/individual/org. that has shown interest in a company's products/services     |
-| Contact        | replace | an individual person associated with an account or organization                                   |
-| Campaign       | replace | marketing initiative or project designed to achieve specific goals, such as generating leads etc. |
-| Product2       | replace | for managing and organizing your product-related data within the Salesforce ecosystem             |
-| Pricebook2     | replace | used to manage product pricing and create price books                                             |
-| PricebookEntry | replace | an object that represents a specific price for a product in a price book                          |
-| Opportunity            | merge | represents a sales opportunity for a specific account or contact                                                            |
-| OpportunityLineItem    | merge | represents individual line items or products associated with an opportunity                                                 |
-| OpportunityContactRole | merge | represents the association between an Opportunity and a contact                                                             |
-| Account                | merge | individual or organization that interacts with your business                                                                |
-| CampaignMember         | merge | association between a contact or lead and a campaign                                                                        |
-| Task                   | merge | used to track and manage various activities and tasks within the salesforce platform                                        |
-| Event                  | merge | used to track and manage calendar-based events, such as meetings, appointments calls, or any other time-specific activities |
-
-* Note that formula fields are included - these function like Views in salesforce and will not be back-updated when their definitions change in Salesforce! The recommended handling is to ignore these fields and reproduce yourself any calculations from the base data fields.
-
-## Setup Guide
+| User           | replace | Refers to an individual who has access to a Salesforce org or instance                            |
+| UserRole       | replace | A standard object that represents a role within the organization's hierarchy                      |
+| Lead           | replace | Prospective customer/individual/org. that has shown interest in a company's products/services     |
+| Contact        | replace | An individual person associated with an account or organization                                   |
+| Campaign       | replace | Marketing initiative or project designed to achieve specific goals, such as generating leads etc. |
+| Product2       | replace | For managing and organizing your product-related data within the Salesforce ecosystem             |
+| Pricebook2     | replace | Used to manage product pricing and create price books                                             |
+| PricebookEntry | replace | An object that represents a specific price for a product in a price book                          |
+| Opportunity            | merge | Represents a sales opportunity for a specific account or contact                                                            |
+| OpportunityLineItem    | merge | Represents individual line items or products associated with an opportunity                                                 |
+| OpportunityContactRole | merge | Represents the association between an Opportunity and a contact                                                             |
+| Account                | merge | Individual or organization that interacts with your business                                                                |
+| CampaignMember         | merge | Association between a contact or lead and a campaign                                                                        |
+| Task                   | merge | Used to track and manage various activities and tasks within the Salesforce platform                                        |
+| Event                  | merge | Used to track and manage calendar-based events, such as meetings, appointments, calls, or any other time-specific activities |
+
+* Note that formula fields are included - these function like views in Salesforce and will not be back-updated when their definitions change in Salesforce! The recommended handling is to ignore these fields and reproduce yourself any calculations from the base data fields.
+
+## Setup guide
+
+
 
 ### Grab credentials
 
 To set up your pipeline, you'll need your Salesforce `user_name`, `password`, and `security_token`.
-Use your login credentials for user_name and password.
+Use your login credentials for `user_name` and `password`.
 
 To obtain the `security_token`, follow these steps:
 
@@ -101,9 +103,9 @@ For more information, read the guide on
    security_token = "please set me up!" # Salesforce security token
    ```
 
-1. In `secrets.toml`, replace username and password with your Salesforce credentials.
+1. In `secrets.toml`, replace `user_name` and `password` with your Salesforce credentials.
 
-1. Update the security_token value with the token you
+1. Update the `security_token` value with the token you
    [copied earlier](salesforce.md#grab-credentials) for secure Salesforce access.
 
 1. Next, follow the [destination documentation](../../dlt-ecosystem/destinations) instructions to
@@ -112,7 +114,7 @@ For more information, read the guide on
 
 For more information, read the [General Usage: Credentials.](../../general-usage/credentials)
 
-## Run the pipeline
+### Run the pipeline
 
 1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
    running the command:
@@ -141,7 +143,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 ### Source `salesforce_source`:
 
 This function returns a list of resources to load users, user_role, opportunity,
-opportunity_line_item, account etc. data from Salesforce API.
+opportunity_line_item, account, etc., data from the Salesforce API.
 
 ```py
 @dlt.source(name="salesforce")
@@ -149,7 +151,7 @@ def salesforce_source(
     user_name: str = dlt.secrets.value,
     password: str = dlt.secrets.value,
     security_token: str = dlt.secrets.value,
-) ->Iterable[DltResource]:
+) -> Iterable[DltResource]:
    ...
 ```
 
@@ -175,7 +177,7 @@ destination.
 | user_role() | contact() | lead() | campaign() | product_2() | pricebook_2() | pricebook_entry() |
 |-------------|-----------|--------|------------|-------------|---------------|-------------------|
 
-The described functions fetch records from endpoints based on their names, e.g. user_role() accesses
+The described functions fetch records from endpoints based on their names, e.g., user_role() accesses
 the "user_role" endpoint.
 
 ### Resource `opportunity` (incremental loading):
@@ -197,9 +199,9 @@ def opportunity(
 ```
 
 `last_timestamp`: Argument that will receive [incremental](../../general-usage/incremental-loading)
-state, initialized with "initial_value". It is configured to track "SystemModstamp" field in data
-item returned by "get_records" and then yielded. It will store the newest "SystemModstamp" value in
-dlt state and make it available in "last_timestamp.last_value" on next pipeline run.
+state, initialized with "initial_value". It is configured to track the "SystemModstamp" field in data
+items returned by "get_records" and then yielded. It will store the newest "SystemModstamp" value in
+dlt state and make it available in "last_timestamp.last_value" on the next pipeline run.
 
 Besides "opportunity", there are several resources that use replace mode for data writing to the
 destination.
@@ -214,11 +216,9 @@ opportunity_line_item() accesses the "opportunity_line_item" endpoint.
 
 ### Create your own pipeline
 
-If you wish to create your own pipelines, you can leverage source and resource methods as discussed
-above.
+If you wish to create your own pipelines, you can leverage source and resource methods as discussed above.
 
-To create your data pipeline using single loading and
-[incremental data loading](../../general-usage/incremental-loading), follow these steps:
+To create your data pipeline using single loading and [incremental data loading](../../general-usage/incremental-loading), follow these steps:
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
@@ -230,8 +230,7 @@ To create your data pipeline using single loading and
    )
    ```
 
-   To read more about pipeline configuration, please refer to our
-   [documentation](../../general-usage/pipeline).
+   To read more about pipeline configuration, please refer to our [documentation](../../general-usage/pipeline).
 
 1. To load data from all the endpoints, use the `salesforce_source` method as follows:
 
@@ -243,8 +242,7 @@ To create your data pipeline using single loading and
    print(load_info)
    ```
 
-   > A hint ensures that the id column is void of null values. During data loading, dlt will verify
-   > that the source's id column doesn't contain nulls.
+   > A hint ensures that the id column is void of null values. During data loading, dlt will verify that the source's id column doesn't contain nulls.
 
 1. To use the method `pipeline.run()` to load custom endpoints “candidates” and “members”:
 
@@ -254,16 +252,9 @@ To create your data pipeline using single loading and
    print(load_info)
    ```
 
-   In the initial run, the "opportunity" and "contact" endpoints load all data using 'merge' mode
-   and 'last_timestamp' set to "None". In subsequent runs, only data after
-   'last_timestamp.last_value' (from the previous run) is merged. Incremental loading is specific to
-   endpoints in merge mode with the “dlt.sources.incremental” parameter.
+   In the initial run, the "opportunity" and "contact" endpoints load all data using 'merge' mode and 'last_timestamp' set to "None". In subsequent runs, only data after 'last_timestamp.last_value' (from the previous run) is merged. Incremental loading is specific to endpoints in merge mode with the “dlt.sources.incremental” parameter.
 
-   > For incremental loading of endpoints, maintain the pipeline name and destination dataset name.
-   > The pipeline name is important for accessing the [state](../../general-usage/state) from the
-   > last run, including the end date for incremental data loads. Altering these names could trigger
-   > a [“dev-mode”](../../general-usage/pipeline#do-experiments-with-dev-mode), disrupting
-   > the metadata tracking for [incremental data loading](../../general-usage/incremental-loading).
+   > For incremental loading of endpoints, maintain the pipeline name and destination dataset name. The pipeline name is important for accessing the [state](../../general-usage/state) from the last run, including the end date for incremental data loads. Altering these names could trigger a [“dev-mode”](../../general-usage/pipeline#do-experiments-with-dev-mode), disrupting the metadata tracking for [incremental data loading](../../general-usage/incremental-loading).
 
 1. To load data from the “contact” in replace mode and “task” incrementally merge mode endpoints:
 
@@ -273,23 +264,13 @@ To create your data pipeline using single loading and
    print(load_info)
    ```
 
-   > Note: In the referenced pipeline, the "contact" parameter is always loaded in "replace" mode,
-   > overwriting existing data. Conversely, the "task" endpoint supports "merge" mode for
-   > incremental loads, updating or adding data based on the 'last_timestamp' value without erasing
-   > previously loaded data.
+   > Note: In the referenced pipeline, the "contact" parameter is always loaded in "replace" mode, overwriting existing data. Conversely, the "task" endpoint supports "merge" mode for incremental loads, updating or adding data based on the 'last_timestamp' value without erasing previously loaded data.
 
-1. Salesforce enforces specific limits on API data requests. These limits
-   vary based on the Salesforce edition and license type, as outlined in the [Salesforce API Request Limits documentation](https://developer.salesforce.com/docs/atlas.en-us.salesforce_app_limits_cheatsheet.meta/salesforce_app_limits_cheatsheet/salesforce_app_limits_platform_api.htm).
+1. Salesforce enforces specific limits on API data requests. These limits vary based on the Salesforce edition and license type, as outlined in the [Salesforce API Request Limits documentation](https://developer.salesforce.com/docs/atlas.en-us.salesforce_app_limits_cheatsheet.meta/salesforce_app_limits_cheatsheet/salesforce_app_limits_platform_api.htm).
 
-   To limit the number of Salesforce API data requests, developers can control the environment for production or
-   development purposes. For development, you can set the `IS_PRODUCTION` variable
-   to `False` in "[salesforce/settings.py](https://github.com/dlt-hub/verified-sources/blob/master/sources/salesforce/settings.py)",
-   which limits API call requests to 100. To modify this limit, you can update the query limit in
-   "[salesforce/helpers.py](https://github.com/dlt-hub/verified-sources/blob/756edaa00f56234cd06699178098f44c16d6d597/sources/salesforce/helpers.py#L56)"
-   as required.
+   To limit the number of Salesforce API data requests, developers can control the environment for production or development purposes. For development, you can set the `IS_PRODUCTION` variable to `False` in "[salesforce/settings.py](https://github.com/dlt-hub/verified-sources/blob/master/sources/salesforce/settings.py)", which limits API call requests to 100. To modify this limit, you can update the query limit in "[salesforce/helpers.py](https://github.com/dlt-hub/verified-sources/blob/756edaa00f56234cd06699178098f44c16d6d597/sources/salesforce/helpers.py#L56)" as required.
 
-   >To read more about Salesforce query limits, please refer to their official
-   >[documentation here](https://developer.salesforce.com/docs/atlas.en-us.soql_sosl.meta/soql_sosl/sforce_api_calls_soql_select_limit.htm).
+   > To read more about Salesforce query limits, please refer to their official [documentation here](https://developer.salesforce.com/docs/atlas.en-us.soql_sosl.meta/soql_sosl/sforce_api_calls_soql_select_limit.htm).
 
 <!--@@@DLT_TUBA salesforce-->
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md b/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md
index ae526668f2..fe11491bd6 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md
@@ -16,7 +16,7 @@ referrals.
 
 This Shopify `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/shopify_dlt_pipeline.py)
-loads data using 'Shopify API' or 'Shopify Partner API' to the destination of your choice.
+loads data using the 'Shopify API' or 'Shopify Partner API' to the destination of your choice.
 
 The resources that this verified source supports are:
 
@@ -25,14 +25,14 @@ The resources that this verified source supports are:
 | customers             | Individuals or entities who have created accounts on a Shopify-powered online store |
 | orders                | Transactions made by customers on an online store                                   |
 | products              | The individual items or goods that are available for sale                           |
-| shopify_partner_query | To query data  using GraphQL queries from Shopify partner API                       |
+| shopify_partner_query | To query data using GraphQL queries from the Shopify partner API                    |
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
 #### Grab Admin API access token
-To load data using Shopify API, you need an Admin API access token. This token can be obtained by following
+To load data using the Shopify API, you need an Admin API access token. This token can be obtained by following
 these steps:
 
 1. Log in to Shopify.
@@ -44,10 +44,10 @@ these steps:
 1. Grant read access in “Admin API access scopes.”
 1. Save the configuration.
 1. Hit “Install app” and confirm.
-1. Reveal and copy the Admin API token. Store safely; it's shown only once.
+1. Reveal and copy the Admin API token. Store it safely; it's shown only once.
 
 #### Grab Partner API access token
-To load data using Shopify Partner API, you need an Partner API access token. This token can be obtained by following
+To load data using the Shopify Partner API, you need a Partner API access token. This token can be obtained by following
 these steps:
 
 1. Log in to Shopify Partners and click the settings icon⚙️ at the bottom left.
@@ -55,7 +55,7 @@ these steps:
 1. Create an API client with a suitable name and assign necessary permissions.
 1. Save and create the API client, then click to show and copy the access token securely.
 
-> Note: The Shopify and Shopify Partner UI, described here might change.
+> Note: The Shopify and Shopify Partner UI, described here, might change.
 The full guide is available at [this link.](https://www.shopify.com/partners/blog/17056443-how-to-generate-a-shopify-api-token)
 
 ### Initialize the verified source
@@ -83,9 +83,7 @@ For more information, read the guide on [how to add a verified source](../../wal
 
 ### Add credential
 
-1. Inside the `.dlt` folder, you'll find a file called `secrets.toml`, which is where you can
-   securely store your access tokens and other sensitive information. It's important to handle this
-   file with care and keep it safe.
+1. Inside the `.dlt` folder, you'll find a file called `secrets.toml`, which is where you can securely store your access tokens and other sensitive information. It's important to handle this file with care and keep it safe.
 
    Here's what the file looks like:
 
@@ -93,14 +91,14 @@ For more information, read the guide on [how to add a verified source](../../wal
       #shopify
       [sources.shopify_dlt]
       private_app_password="Please set me up!" #Admin API access token
-      access_token=" Please set me up!" #Partner API acess token
+      access_token="Please set me up!" #Partner API access token
       ```
 
 1. Update `private_app_password` with the "Admin API access token".
 1. Similarly, update the `access_token` with the "Partner API access token".
 
-   >To load data using Shopify API, update the `private_app_password`.
-   >To load data using Shopify partner API, update the `access_token`.
+   >To load data using the Shopify API, update the `private_app_password`.
+   >To load data using the Shopify partner API, update the `access_token`.
 
 1. Next, store your pipeline configuration details in the `.dlt/config.toml`.
 
@@ -108,26 +106,21 @@ For more information, read the guide on [how to add a verified source](../../wal
 
    ```toml
    [sources.shopify_dlt]
-   shop_url = "Please set me up !"
+   shop_url = "Please set me up!"
    organization_id = "Please set me up!"
    ```
 
-1. Update `shop_url` with the URL of your Shopify store. For example,
-   "https://shop-123.myshopify.com/%E2%80%9D".
+1. Update `shop_url` with the URL of your Shopify store. For example, "https://shop-123.myshopify.com/".
 
-1. Update `organization_id` with a code from your Shopify partner URL. For example in
-   "https://partners.shopify.com/1234567", the code '1234567' is the organization ID.
+1. Update `organization_id` with a code from your Shopify partner URL. For example, in "https://partners.shopify.com/1234567", the code '1234567' is the organization ID.
 
-1. Next, follow the [destination documentation](../../dlt-ecosystem/destinations) instructions to
-   add credentials for your chosen destination, ensuring proper routing of your data to the final
-   destination.
+1. Next, follow the [destination documentation](../../dlt-ecosystem/destinations) instructions to add credentials for your chosen destination, ensuring proper routing of your data to the final destination.
 
 For more information, read the [General Usage: Credentials.](../../general-usage/credentials)
 
 ## Run the pipeline
 
-1. Before running the pipeline, ensure that you have installed all the necessary dependencies by
-   running the command:
+1. Before running the pipeline, ensure that you have installed all the necessary dependencies by running the command:
    ```sh
    pip install -r requirements.txt
    ```
@@ -135,25 +128,21 @@ For more information, read the [General Usage: Credentials.](../../general-usage
    ```sh
    python shopify_dlt_pipeline.py
    ```
-1. Once the pipeline has finished running, you can verify that everything loaded correctly by using
-   the following command:
+1. Once the pipeline has finished running, you can verify that everything loaded correctly by using the following command:
    ```sh
    dlt pipeline <pipeline_name> show
    ```
-   For example, the `pipeline_name` for the above pipeline example is `shopify_data`, you may also
-   use any custom name instead.
+   For example, the `pipeline_name` for the above pipeline example is `shopify_data`, you may also use any custom name instead.
 
 For more information, read the guide on [how to run a pipeline](../../walkthroughs/run-a-pipeline).
 
 ## Sources and resources
 
-`dlt` works on the principle of [sources](../../general-usage/source) and
-[resources](../../general-usage/resource).
+`dlt` works on the principle of [sources](../../general-usage/source) and [resources](../../general-usage/resource).
 
 ### Source `shopify_source`:
 
-This function returns a list of resources to load products, orders, and customers data from Shopify
-API.
+This function returns a list of resources to load products, orders, and customers data from the Shopify API.
 
 ```py
 def shopify_source(
@@ -177,11 +166,9 @@ def shopify_source(
 
 `items_per_page`: Max items fetched per page (Default: 250).
 
-`start_date`: Imports items updated since this date (Default: 2000-01-01). Used for incremental
-loading if end_time isn't specified. Accepts ISO 8601 date/datetime formats.
+`start_date`: Imports items updated since this date (Default: 2000-01-01). Used for incremental loading if end_time isn't specified. Accepts ISO 8601 date/datetime formats.
 
-`end_time`: Data load range end time. Paired with start_date for specified time range. Enables
-incremental loading if unspecified.
+`end_time`: Data load range end time. Paired with start_date for specified time range. Enables incremental loading if unspecified.
 
 `created_at_min`: Load items created since this date (Default: 2000-01-01).
 
@@ -189,8 +176,7 @@ incremental loading if unspecified.
 
 ### Resource `products`:
 
-This resource loads products from your Shopify shop into the destination. It supports incremental
-loading and pagination.
+This resource loads products from your Shopify shop into the destination. It supports incremental loading and pagination.
 
 ```py
 @dlt.resource(primary_key="id", write_disposition="merge")
@@ -211,8 +197,7 @@ def products(
 
 `updated_at`: The saved [state](../../general-usage/state) of the last 'updated_at' value.
 
-Similar to the mentioned resource, there are two more resources "orders" and "customers", both
-support incremental loading and pagination.
+Similar to the mentioned resource, there are two more resources "orders" and "customers", both support incremental loading and pagination.
 
 ### Resource `shopify_partner_query`:
 This resource can be used to run custom GraphQL queries to load paginated data.
@@ -236,7 +221,7 @@ def shopify_partner_query(
 
 `data_items_path`: JSONPath to array items in query results.
 
-`pagination_cursor_path`: The JSONPath to the pagination cursor in the query result, will be piped to the next query via variables.
+`pagination_cursor_path`: The JSONPath to the pagination cursor in the query result, which will be piped to the next query via variables.
 
 `pagination_variable_name`: The name of the variable to pass the pagination cursor to.
 
@@ -246,14 +231,15 @@ def shopify_partner_query(
 
 `organization_id`: Your Organization ID, found in the Partner Dashboard.
 
-`api_version`: The API version to use (e.g. 2024-01). Use `unstable` for the latest version.
+`api_version`: The API version to use (e.g., 2024-01). Use `unstable` for the latest version.
 
 ## Customization
 
+
+
 ### Create your own pipeline
 
-If you wish to create your own pipelines, you can leverage source and resource methods from this
-verified source.
+If you wish to create your own pipelines, you can leverage source and resource methods from this verified source.
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
@@ -265,10 +251,9 @@ verified source.
    )
    ```
 
-   To read more about pipeline configuration, please refer to our
-   [documentation](../../general-usage/pipeline).
+   To read more about pipeline configuration, please refer to our [documentation](../../general-usage/pipeline).
 
-1. To load data from "products", "orders" and "customers" from 1st Jan 2023.
+1. To load data from "products", "orders", and "customers" from January 1, 2023:
 
    ```py
    # Add your desired resources to the list...
@@ -280,9 +265,7 @@ verified source.
    print(load_info)
    ```
 
-1. To load past Shopify orders in weekly chunks using start_date and end_date parameters. This
-   minimizes potential failure during large data loads. Running chunks and incremental loads in
-   parallel accelerates the initial load.
+1. To load past Shopify orders in weekly chunks using start_date and end_date parameters. This minimizes potential failure during large data loads. Running chunks and incremental loads in parallel accelerates the initial load.
 
    ```py
    # Load all orders from 2023-01-01 to now
@@ -315,7 +298,7 @@ verified source.
    )
    print(load_info)
    ```
-1. To load the first 10 transactions via GraphQL query from the Shopify Partner API.
+1. To load the first 10 transactions via a GraphQL query from the Shopify Partner API.
    ```py
     # Construct query to load transactions 100 per page, the `$after` variable is used to paginate
     query = """query Transactions($after: String) {
@@ -330,7 +313,7 @@ verified source.
     }
     """
 
-    # Configure the resource with the query and json paths to extract the data and pagination cursor
+    # Configure the resource with the query and JSON paths to extract the data and pagination cursor
     resource = shopify_partner_query(
         query,
         # JSON path pointing to the data item in the results
@@ -346,3 +329,4 @@ verified source.
    ```
 
 <!--@@@DLT_TUBA shopify_dlt-->
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/index.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/index.md
index a8146c75fe..22f9c23a06 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/index.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/index.md
@@ -5,12 +5,11 @@ keywords: [sql connector, sql database pipeline, sql database]
 ---
 import Header from '../_source-info-header.md';
 
-# 30+ SQL Databases
+# 30+ SQL databases
 
 <Header/>
 
-SQL databases are management systems (DBMS) that store data in a structured format, commonly used
-for efficient and reliable data retrieval.
+SQL databases are management systems (DBMS) that store data in a structured format, commonly used for efficient and reliable data retrieval.
 
 The SQL Database verified source loads data to your specified destination using one of the following backends: SQLAlchemy, PyArrow, pandas, or ConnectorX.
 
@@ -18,7 +17,7 @@ Sources and resources that can be loaded using this verified source are:
 
 | Name         | Description                                                          |
 | ------------ | -------------------------------------------------------------------- |
-| sql_database | Reflects the tables and views in SQL database and retrieves the data |
+| sql_database | Reflects the tables and views in an SQL database and retrieves the data |
 | sql_table    | Retrieves data from a particular SQL database table                  |
 |              |                                                                      |
 
@@ -47,5 +46,6 @@ We support all [SQLAlchemy dialects](https://docs.sqlalchemy.org/en/20/dialects/
 * Teradata Vantage
 
 :::note
-Note that there many unofficial dialects, such as [DuckDB](https://duckdb.org/).
-:::
\ No newline at end of file
+Note that there are many unofficial dialects, such as [DuckDB](https://duckdb.org/).
+:::
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/setup.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/setup.md
index a91ae40028..5af23570bb 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/setup.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/setup.md
@@ -10,9 +10,9 @@ import Header from '../_source-info-header.md';
 
 <Header/>
 
-To connect to your SQL database using `dlt` follow these steps:
+To connect to your SQL database using `dlt`, follow these steps:
 
-1.  Initialize a `dlt` project in the current working directory by running the following command:
+1. Initialize a `dlt` project in the current working directory by running the following command:
 
     ```sh 
     dlt init sql_database duckdb
@@ -43,11 +43,11 @@ If you'd like to use a different destination, simply replace `duckdb` with the n
     credentials="mysql+pymysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam"
     ```
 
-    To learn more about how to add credentials into your `sql_database` pipeline see [here](./configuration#configuring-the-connection).  
+    To learn more about how to add credentials into your `sql_database` pipeline, see [here](./configuration#configuring-the-connection).  
 
 3. Add credentials for your destination (if necessary)  
 
-    Depending on which [destination](../../destinations) you're loading into, you might also need to add your destination credentials. For more information read the [General Usage: Credentials.](../../../general-usage/credentials)
+    Depending on which [destination](../../destinations) you're loading into, you might also need to add your destination credentials. For more information, read the [General Usage: Credentials.](../../../general-usage/credentials)
 
 4. Install any necessary dependencies  
 
@@ -61,7 +61,7 @@ If you'd like to use a different destination, simply replace `duckdb` with the n
     python sql_database_pipeline.py
     ```
 
-    Executing this command will run the example script `sql_database_pipeline.py` created in step 1. In order for this to run successfully you will need to pass the names of the databases and/or tables you wish to load. 
+    Executing this command will run the example script `sql_database_pipeline.py` created in step 1. In order for this to run successfully, you will need to pass the names of the databases and/or tables you wish to load. 
     See the [section on configuring the sql_database source](./configuration#configuring-the-sql-database-source) for more details.
 
 
@@ -73,4 +73,5 @@ If you'd like to use a different destination, simply replace `duckdb` with the n
    :::note
    The pipeline_name for the above example is `rfam`, you may also use any
    custom name instead. 
-   :::  
\ No newline at end of file
+   :::  
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/troubleshooting.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/troubleshooting.md
index 33986fb5a6..d0930716d8 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/troubleshooting.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/troubleshooting.md
@@ -15,7 +15,7 @@ import Header from '../_source-info-header.md';
 #### Connecting to MySQL with SSL 
 Here, we use the `mysql` and `pymysql` dialects to set up an SSL connection to a server, with all information taken from the [SQLAlchemy docs](https://docs.sqlalchemy.org/en/14/dialects/mysql.html#ssl-connections).
 
-1. To enforce SSL on the client without a client certificate you may pass the following DSN:
+1. To enforce SSL on the client without a client certificate, you may pass the following DSN:
 
    ```toml
    sources.sql_database.credentials="mysql+pymysql://root:<pass>@<host>:3306/mysql?ssl_ca="
@@ -38,22 +38,22 @@ Here, we use the `mysql` and `pymysql` dialects to set up an SSL connection to a
 **To connect to an `mssql` server using Windows authentication**, include `trusted_connection=yes` in the connection string.
 
 ```toml
-sources.sql_database.credentials="mssql+pyodbc://loader.database.windows.net/dlt_data?trusted_connection=yes&driver=ODBC+Driver+17+for+SQL+Server"
+sources.sql_database.credentials="mssql+pyodbc://loader.database.windows.net/dlt_data?trusted_connection=yes&driver=ODBC+Driver 17+for+SQL+Server"
 ```
 
-**To connect to a local sql server instance running without SSL** pass `encrypt=no` parameter:
+**To connect to a local SQL server instance running without SSL**, pass the `encrypt=no` parameter:
 ```toml
-sources.sql_database.credentials="mssql+pyodbc://loader:loader@localhost/dlt_data?encrypt=no&driver=ODBC+Driver+17+for+SQL+Server"
+sources.sql_database.credentials="mssql+pyodbc://loader:loader@localhost/dlt_data?encrypt=no&driver=ODBC+Driver 17+for+SQL+Server"
 ```
 
-**To allow self signed SSL certificate** when you are getting `certificate verify failed:unable to get local issuer certificate`:
+**To allow a self-signed SSL certificate** when you are getting `certificate verify failed: unable to get local issuer certificate`:
 ```toml
-sources.sql_database.credentials="mssql+pyodbc://loader:loader@localhost/dlt_data?TrustServerCertificate=yes&driver=ODBC+Driver+17+for+SQL+Server"
+sources.sql_database.credentials="mssql+pyodbc://loader:loader@localhost/dlt_data?TrustServerCertificate=yes&driver=ODBC+Driver 17+for+SQL+Server"
 ```
 
 **To use long strings (>8k) and avoid collation errors**:
 ```toml
-sources.sql_database.credentials="mssql+pyodbc://loader:loader@localhost/dlt_data?LongAsMax=yes&driver=ODBC+Driver+17+for+SQL+Server"
+sources.sql_database.credentials="mssql+pyodbc://loader:loader@localhost/dlt_data?LongAsMax=yes&driver=ODBC+Driver 17+for+SQL+Server"
 ```
 
 ## Troubleshooting backends
@@ -61,7 +61,7 @@ sources.sql_database.credentials="mssql+pyodbc://loader:loader@localhost/dlt_dat
 ### Notes on specific databases
 
 #### Oracle
-1. When using the `oracledb` dialect in thin mode we are getting protocol errors. Use thick mode or `cx_oracle` (old) client.
+1. When using the `oracledb` dialect in thin mode, we are getting protocol errors. Use thick mode or the `cx_oracle` (old) client.
 2. Mind that `SQLAlchemy` translates Oracle identifiers into lower case! Keep the default `dlt` naming convention (`snake_case`) when loading data. We'll support more naming conventions soon.
 3. `Connectorx` is for some reason slower for Oracle than the `PyArrow` backend.  
   
@@ -69,21 +69,22 @@ See [here](https://github.com/dlt-hub/sql_database_benchmarking/tree/main/oracle
 
 #### DB2
 1. Mind that `SQLAlchemy` translates DB2 identifiers into lower case! Keep the default `dlt` naming convention (`snake_case`) when loading data. We'll support more naming conventions soon.
-2. The DB2 type `DOUBLE` gets incorrectly mapped to the python type `float` (instead of the `SqlAlchemy` type `Numeric` with default precision). This requires `dlt` to perform additional casts. The cost of the cast, however, is minuscule compared to the cost of reading rows from database.  
+2. The DB2 type `DOUBLE` gets incorrectly mapped to the Python type `float` (instead of the `SQLAlchemy` type `Numeric` with default precision). This requires `dlt` to perform additional casts. The cost of the cast, however, is minuscule compared to the cost of reading rows from the database.  
 
-See [here](https://github.com/dlt-hub/sql_database_benchmarking/tree/main/db2#installing-and-setting-up-db2) for information and code on setting up and benchmarking on db2.
+See [here](https://github.com/dlt-hub/sql_database_benchmarking/tree/main/db2#installing-and-setting-up-db2) for information and code on setting up and benchmarking on DB2.
 
 #### MySQL
-1. The `SqlAlchemy` dialect converts doubles to decimals. (This can be disabled via the table adapter argument as shown in the code example [here](./configuration#pyarrow))
+1. The `SQLAlchemy` dialect converts doubles to decimals. (This can be disabled via the table adapter argument as shown in the code example [here](./configuration#pyarrow))
 
 #### Postgres / MSSQL
-No issues were found for these databases. Postgres is the only backend where we observed 2x speedup with `ConnectorX` (see [here](https://github.com/dlt-hub/sql_database_benchmarking/tree/main/postgres) for the benchmarking code). On other db systems it performs the same as (or some times worse than) the `PyArrow` backend.  
-  
+No issues were found for these databases. Postgres is the only backend where we observed a 2x speedup with `ConnectorX` (see [here](https://github.com/dlt-hub/sql_database_benchmarking/tree/main/postgres) for the benchmarking code). On other db systems, it performs the same as (or sometimes worse than) the `PyArrow` backend.
+
 ### Notes on specific data types
 
 #### JSON
 
-In the `SQLAlchemy` backend JSON data type is represented as a Python object, and in the `PyArrow` backend, it is represented as a JSON string. At present it does not work correctly with `pandas` and `ConnectorX`which cast Python objects to `str`, generating invalid JSON strings that cannot be loaded into destination.
+In the `SQLAlchemy` backend, the JSON data type is represented as a Python object, and in the `PyArrow` backend, it is represented as a JSON string. At present, it does not work correctly with `pandas` and `ConnectorX`, which cast Python objects to `str`, generating invalid JSON strings that cannot be loaded into the destination.
 
 #### UUID  
-UUIDs are represented as string by default. You can switch this behavior by using `table_adapter_callback` to modify properties of the UUID type for a particular column. (See the code example [here](./configuration#pyarrow) for how to modify the data type properties of a particular column.)
\ No newline at end of file
+UUIDs are represented as strings by default. You can switch this behavior by using `table_adapter_callback` to modify properties of the UUID type for a particular column. (See the code example [here](./configuration#pyarrow) for how to modify the data type properties of a particular column.)
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/usage.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/usage.md
index bb2f39b007..bdc440630d 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/usage.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/usage.md
@@ -12,7 +12,7 @@ import Header from '../_source-info-header.md';
 
 ## Applying column-wise filtering on the data being ingested
 
-By default, the existing source and resource functions, `sql_database` and `sql_table`, ingest all of the records from the source table. But by using `query_adapter_callback`, it is possible to pass a `WHERE` clause inside the underlying `SELECT` statement using the [SQLAlchemy syntax](https://docs.sqlalchemy.org/en/14/core/selectable.html#). Thich enables filtering the data based on specific columns before extract.
+By default, the existing source and resource functions, `sql_database` and `sql_table`, ingest all of the records from the source table. However, by using `query_adapter_callback`, it is possible to pass a `WHERE` clause inside the underlying `SELECT` statement using the [SQLAlchemy syntax](https://docs.sqlalchemy.org/en/14/core/selectable.html#). This enables filtering the data based on specific columns before extraction.
 
 The example below uses `query_adapter_callback` to filter on the column `customer_id` for the table `orders`:
 
@@ -32,11 +32,10 @@ source = sql_database(
 ```
 
 ## Transforming the data before load
-You have direct access to the extracted data through the resource objects (`sql_table()` or `sql_database().with_resource())`), each of which represents a single SQL table. These objects are generators that yield
-individual rows of the table which can be modified by using custom python functions. These functions can be applied to the resource using `add_map`.
+You have direct access to the extracted data through the resource objects (`sql_table()` or `sql_database().with_resource())`), each of which represents a single SQL table. These objects are generators that yield individual rows of the table, which can be modified by using custom Python functions. These functions can be applied to the resource using `add_map`.
 
 :::note
-The PyArrow backend does not yield individual rows rather loads chunks of data as `ndarray`. In this case, the transformation function that goes into `add_map` should be configured to expect an `ndarray` input.
+The PyArrow backend does not yield individual rows but loads chunks of data as `ndarray`. In this case, the transformation function that goes into `add_map` should be configured to expect an `ndarray` input.
 :::
 
 
@@ -50,7 +49,7 @@ Examples:
 
     def pseudonymize_name(doc):
         '''
-        Pseudonmyisation is a deterministic type of PII-obscuring
+        Pseudonymization is a deterministic type of PII-obscuring.
         Its role is to allow identifying users by their hash,
         without revealing the underlying info.
         '''
@@ -99,10 +98,11 @@ Examples:
 
 ## Deploying the sql_database pipeline
 
-You can deploy the `sql_database` pipeline with any of the `dlt` deployment methods, such as [GitHub Actions](../../../walkthroughs/deploy-a-pipeline/deploy-with-github-actions), [Airflow](../../../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer), [Dagster](../../../walkthroughs/deploy-a-pipeline/deploy-with-dagster) etc. See [here](../../../walkthroughs/deploy-a-pipeline) for a full list of deployment methods.
+You can deploy the `sql_database` pipeline with any of the `dlt` deployment methods, such as [GitHub Actions](../../../walkthroughs/deploy-a-pipeline/deploy-with-github-actions), [Airflow](../../../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer), [Dagster](../../../walkthroughs/deploy-a-pipeline/deploy-with-dagster), etc. See [here](../../../walkthroughs/deploy-a-pipeline) for a full list of deployment methods.
 
 ### Running on Airflow
 When running on Airflow:
-1. Use the `dlt` [Airflow Helper](../../../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md#2-modify-dag-file) to create tasks from the `sql_database` source. (If you want to run table extraction in parallel, then you can do this by setting `decompose = "parallel-isolated"` when doing the source->DAG conversion. See [here](../../../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer#2-modify-dag-file) for code example.)
-2. Reflect tables at runtime with `defer_table_reflect` argument.
+1. Use the `dlt` [Airflow Helper](../../../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md#2-modify-dag-file) to create tasks from the `sql_database` source. (If you want to run table extraction in parallel, you can do this by setting `decompose = "parallel-isolated"` when doing the source->DAG conversion. See [here](../../../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer#2-modify-dag-file) for a code example.)
+2. Reflect tables at runtime with the `defer_table_reflect` argument.
 3. Set `allow_external_schedulers` to load data using [Airflow intervals](../../../general-usage/incremental-loading.md#using-airflow-schedule-for-backfill-and-incremental-loading).
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md b/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md
index a9d70c338c..3718ab7110 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md
@@ -17,7 +17,7 @@ you'll ingest to transfer data to your warehouse.
 
 This Strapi `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/strapi_pipeline.py)
-loads data using “Strapi API” to the destination of your choice.
+loads data using the “Strapi API” to the destination of your choice.
 
 Sources and resources that can be loaded using this verified source are:
 
@@ -25,7 +25,7 @@ Sources and resources that can be loaded using this verified source are:
 | ------------- | -------------------------- |
 | strapi_source | Retrieves data from Strapi |
 
-## Setup Guide
+## Setup guide
 
 ### Grab API token
 
@@ -86,7 +86,7 @@ For more information, read the guide on [how to add a verified source](../../wal
 
 1. Finally, enter credentials for your chosen destination as per the [docs](../destinations/).
 
-For more information, read the [General Usage: Credentials.](../../general-usage/credentials)
+For more information, read the [General usage: Credentials.](../../general-usage/credentials)
 
 ## Run the pipeline
 
@@ -126,7 +126,7 @@ For more information, read the guide on [how to run a pipeline](../../walkthroug
 
 ### Source `strapi_source`
 
-This function retrives data from Strapi.
+This function retrieves data from Strapi.
 
 ```py
 @dlt.source
@@ -176,3 +176,4 @@ verified source.
 > requirements.
 
 <!--@@@DLT_TUBA strapi-->
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/workable.md b/docs/website/docs/dlt-ecosystem/verified-sources/workable.md
index 73565f7e94..1d6f59dd3e 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/workable.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/workable.md
@@ -10,12 +10,11 @@ import Header from './_source-info-header.md';
 <Header/>
 
 [Workable](https://www.workable.com/) is an online platform for posting jobs and managing the hiring process. With Workable,
-employers can create job listings, receive applications, track candidates, collaborate with team
-members, schedule interviews, and manage the overall hiring workflow.
+employers can create job listings, receive applications, track candidates, collaborate with team members, schedule interviews, and manage the overall hiring workflow.
 
 This Workable `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/workable_pipeline.py)
-loads data using “Workable API” to the destination of your choice.
+loads data using the “Workable API” to the destination of your choice.
 
 ### Default endpoints
 
@@ -23,32 +22,34 @@ This verified source loads data from the following default endpoints:
 
 | Name              | Description                                                                           |
 | ----------------- | ------------------------------------------------------------------------------------- |
-| members           | individuals who have access to your Workable account                                  |
-| recruiters        | individuals who are responsible for managing the hiring and recruitment processes     |
-| stages            | represent the different steps or phases in the hiring process for a job position      |
-| requisitions      | formal request made by an organization to fill a specific job opening or position     |
-| jobs              | individual job postings or job listings created by employers or recruiters            |
-| custom_attributes | additional fields or data points that you can define and assign to candidates or jobs |
-| events            | specific occurrences or actions related to the hiring and recruitment process         |
-| candidates        | individuals who have applied for job positions within an organization                 |
+| members           | Individuals who have access to your Workable account                                  |
+| recruiters        | Individuals who are responsible for managing the hiring and recruitment processes     |
+| stages            | Represent the different steps or phases in the hiring process for a job position      |
+| requisitions      | Formal requests made by an organization to fill a specific job opening or position     |
+| jobs              | Individual job postings or job listings created by employers or recruiters            |
+| custom_attributes | Additional fields or data points that you can define and assign to candidates or jobs |
+| events            | Specific occurrences or actions related to the hiring and recruitment process         |
+| candidates        | Individuals who have applied for job positions within an organization                 |
 
 ### Dependent endpoints
 
-Besides the main endpoints, for "candidate" and "jobs" endpoints, the following are their dependent endpoints:
+Besides the main endpoints, for the "candidate" and "jobs" endpoints, the following are their dependent endpoints:
 
 | Name                              | Dependent endpoints                                                                                |
 | --------------------------------- | -------------------------------------------------------------------------------------------------- |
-| candidates/:id/activities         | retrieve activities or events related to the candidate's interaction with the hiring process.      |
-| candidates/:id/offer              | a specific candidate's offer information                                                           |
-| jobs/:shortcode/activities        | activities associated with a particular job posting identified by its shortcode                    |
-| jobs/:shortcode/application_form  | application form details for a specified job                                                       |
-| jobs/:shortcode/questions         | retrieve the interview questions associated with a specific job posting                            |
-| jobs/:shortcode/stages            | retrieve information about the hiring stages associated with a particular job                      |
-| jobs/:shortcode/custom_attributes | retrieve custom attributes associated with a particular job posting                                |
-| jobs/:shortcode/members           | retrieve information about the members associated with a particular job within the Workable system |
-| jobs/:shortcode/recruiters        | retrieve the list of recruiters associated with a particular job.                                  |
-
-## Setup Guide
+| candidates/:id/activities         | Retrieve activities or events related to the candidate's interaction with the hiring process.      |
+| candidates/:id/offer              | A specific candidate's offer information                                                           |
+| jobs/:shortcode/activities        | Activities associated with a particular job posting identified by its shortcode                    |
+| jobs/:shortcode/application_form  | Application form details for a specified job                                                       |
+| jobs/:shortcode/questions         | Retrieve the interview questions associated with a specific job posting                            |
+| jobs/:shortcode/stages            | Retrieve information about the hiring stages associated with a particular job                      |
+| jobs/:shortcode/custom_attributes | Retrieve custom attributes associated with a particular job posting                                |
+| jobs/:shortcode/members           | Retrieve information about the members associated with a particular job within the Workable system |
+| jobs/:shortcode/recruiters        | Retrieve the list of recruiters associated with a particular job.                                  |
+
+## Setup guide
+
+
 
 ### Grab API credentials
 
@@ -101,7 +102,7 @@ For more information, read the guide on [how to add a verified source.](../../wa
    [you copied above](workable.md#grab-api-credentials). This will ensure that your data pipeline
    example can access your Workable resources securely.
 
-1. Next you need to configure ".dlt/config.toml", which looks like:
+1. Next, you need to configure ".dlt/config.toml", which looks like:
 
    ```toml
    [sources.workable]
@@ -184,8 +185,8 @@ def workable_source(
 
 `start_date`: Optional. Sets a data retrieval start date; defaults to January 1, 2000.
 
-`load_details`: A boolean parameter. Set to true to load dependent endpoints with main ones (”jobs”
-& “candidates”).
+`load_details`: A boolean parameter. Set to true to load dependent endpoints with main ones ("jobs"
+& "candidates").
 
 ### Resource `candidate_resource`
 
@@ -205,14 +206,12 @@ def candidates_resource(
 1, 2000 if undefined.
 
 ## Customization
+
 ### Create your own pipeline
 
-If you wish to create your own pipelines, you can leverage source and resource methods from this
-verified source.
+If you wish to create your own pipelines, you can leverage source and resource methods from this verified source.
 
-To create your data pipeline using single loading and
-[incremental data loading](../../general-usage/incremental-loading) (only for the
-**Candidates** endpoint), follow these steps:
+To create your data pipeline using single loading and [incremental data loading](../../general-usage/incremental-loading) (only for the **Candidates** endpoint), follow these steps:
 
 1. Configure the pipeline by specifying the pipeline name, destination, and dataset as follows:
 
@@ -232,8 +231,7 @@ To create your data pipeline using single loading and
    print(load_info)
    ```
 
-   > Note: In the run, the "candidates" endpoint loads incrementally via 'merge' mode using
-   > 'updated_by'. All other endpoints load in 'replace' mode.
+   > Note: In the run, the "candidates" endpoint loads incrementally via 'merge' mode using 'updated_by'. All other endpoints load in 'replace' mode.
 
 1. To load data from a specific date, including dependent endpoints:
 
@@ -257,8 +255,7 @@ To create your data pipeline using single loading and
 
    > Note: "candidates" loads incrementally in merge mode, while "members" uses replace mode.
 
-1. To load data from the “jobs” endpoint and its dependent endpoints like "activities" and
-   "application_form":
+1. To load data from the “jobs” endpoint and its dependent endpoints like "activities" and "application_form":
 
    ```py
    load_data = workable_source(start_date=datetime(2022, 2, 1), load_details=True)
@@ -268,12 +265,7 @@ To create your data pipeline using single loading and
    ```
    > Note: "load_details" parameter is set to True.
 
-1. To use incremental loading for the candidates endpoint, maintain the same pipeline and
-   destination dataset names. The pipeline name helps retrieve the
-   [state](../../general-usage/state) of the last run, essential for incremental
-   data loading. Changing these names might trigger a
-   [“dev_mode”](../../general-usage/pipeline#do-experiments-with-dev-mode),
-   disrupting metadata tracking for
-   [incremental data loading](../../general-usage/incremental-loading).
+1. To use incremental loading for the candidates endpoint, maintain the same pipeline and destination dataset names. The pipeline name helps retrieve the [state](../../general-usage/state) of the last run, essential for incremental data loading. Changing these names might trigger a [“dev_mode”](../../general-usage/pipeline#do-experiments-with-dev-mode), disrupting metadata tracking for [incremental data loading](../../general-usage/incremental-loading).
 
 <!--@@@DLT_TUBA workable-->
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md b/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md
index cfccf5d675..b34bc83087 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md
@@ -15,7 +15,7 @@ analytics, and talks.
 
 This Zendesk `dlt` verified source and
 [pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/zendesk_pipeline.py)
-loads data using “Zendesk Support API”, "Zendesk Chat API" and "Zendesk Talk API" to the destination
+loads data using the “Zendesk Support API”, "Zendesk Chat API", and "Zendesk Talk API" to the destination
 of your choice.
 
 Endpoints that can be loaded using this verified source are:
@@ -23,24 +23,24 @@ Endpoints that can be loaded using this verified source are:
 | Name                       | Description                                                                                                                                                                                                                                                                                                                                                                                  |
 | -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | SUPPORT_ENDPOINTS          | "users", "sla_policies", "groups", "organizations", "brands"                                                                                                                                                                                                                                                                                                                                 |
-| SUPPORT_EXTRA_ENDPOINTS    | "activities", "automations", "custom_agent_roles", "dynamic_content", "group_memberships",<br/> "job_status","macros", "organization_fields", "organization_memberships", "recipient_addresses", <br/> "requests" , "satisfaction_ratings", "sharing_agreements", "skips", "suspended_tickets","targets", <br/> "ticket_forms", "ticket_metrics", "triggers", "user_fields", "views", "tags" |
+| SUPPORT_EXTRA_ENDPOINTS    | "activities", "automations", "custom_agent_roles", "dynamic_content", "group memberships",<br/> "job_status", "macros", "organization_fields", "organization memberships", "recipient_addresses", <br/> "requests", "satisfaction_ratings", "sharing_agreements", "skips", "suspended_tickets", "targets", <br/> "ticket_forms", "ticket_metrics", "triggers", "user_fields", "views", "tags" |
 | TALK_ENDPOINTS             | "calls", "addresses", "greeting_categories", "greetings", "ivrs", <br/> "phone_numbers", "settings", "lines", "agents_activity"                                                                                                                                                                                                                                                              |
 | INCREMENTAL_TALK_ENDPOINTS | "calls", "logs"                                                                                                                                                                                                                                                                                                                                                                              |
 
-> To the get the complete list of endpoints, please refer to
+> To get the complete list of endpoints, please refer to
 > ["zendesk/settings.py".](https://github.com/dlt-hub/verified-sources/blob/master/sources/zendesk/settings.py)
 
-## Setup Guide
+## Setup guide
 
 ### Grab credentials
 
-You can load data from three types of Zendesk services, that are :
+You can load data from three types of Zendesk services, which are:
 
 - Zendesk Support
 - Zendesk Chat
 - Zendesk Talk
 
-### Zendesk Support
+### Zendesk support
 
 Zendesk support can be authenticated using one of the following three methods:
 
@@ -52,7 +52,7 @@ The simplest way to authenticate is via subdomain + email address + password, si
 are already available and you don't have to generate any tokens. Alternatively, you can also use API
 tokens or OAuth tokens.
 
-#### Grab Subdomain
+#### Grab subdomain
 
 1. Log into Zendesk to find your subdomain in the URL. E.g., for https://www.dlthub.zendesk.com, the
    subdomain is "dlthub".
@@ -60,7 +60,7 @@ tokens or OAuth tokens.
 #### Grab Zendesk support API token
 
 1. In Zendesk (top right), select Admin Center.
-1. Choose "Apps and Integrations.
+1. Choose "Apps and Integrations".
 1. Navigate to APIs and select Zendesk API. Activate “Password access” & “Token access”.
 1. Click “Add API token”, add a description, and note down the API token.
 1. The token displays just once; note it safely.
@@ -78,8 +78,8 @@ Here's a summarized version:
 1. Alternatively, fetch client ID via OAuth [using
    this](https://developer.zendesk.com/documentation/ticketing/working-with-oauth/creating-and-using-oauth-tokens-with-the-api/#getting-an-oauth-clients-id).
 
-1. To get full token using the client id obtained above, you can follow the [instructions
-   here.](https://developer.zendesk.com/documentation/ticketing/working-with-oauth/creating-and-using-oauth-tokens-with-the-api/#creating-the-access-token)
+1. To get the full token using the client ID obtained above, you can follow the [instructions
+   here](https://developer.zendesk.com/documentation/ticketing/working-with-oauth/creating-and-using-oauth-tokens-with-the-api/#creating-the-access-token).
 
    ```sh
     curl https://{subdomain}.zendesk.com/api/v2/oauth/tokens.json \
@@ -98,10 +98,10 @@ Here's a summarized version:
 
    > We've set the scope as 'read', but you can customize the scope as needed.
 
-1. In response to the above request you'll get a full token which can be used to configure Zendesk
+1. In response to the above request, you'll get a full token which can be used to configure Zendesk
    support.
 
-### Zendesk Chat
+### Zendesk chat
 
 Zendesk chat can be authenticated using this method:
 
@@ -116,13 +116,13 @@ subdomain is "dlthub".
 
 #### Grab Zendesk chat OAuth token
 
-To generate Zendesk chat OAuth token, please refer to this
+To generate a Zendesk chat OAuth token, please refer to this
 [documentation](https://support.zendesk.com/hc/en-us/articles/4408828740762-Chat-API-tutorial-Generating-an-OAuth-token-integrated-Chat-accounts-#:~:text=Create%20the%20OAuth%20API%20client,-First%20of%20all&text=Go%20to%20Zendesk%20Chat%20%3E%20Account,Client%20to%20finish%20the%20setup)
 . Below is a summary of the steps:
 
 1. Access Zendesk Chat directly or through the top right "Chat" option in Zendesk product.
 1. Navigate to "Settings" > "Account" > "API" > "Add API client".
-1. Fill in client name, company, and redirect URLs (default: http://localhost:8080).
+1. Fill in the client name, company, and redirect URLs (default: http://localhost:8080).
 1. Record the "CLIENT_ID" and "SUBDOMAIN".
 1. Format the below URL with your own CLIENT_ID and SUBDOMAIN, paste it into a new browser tab, and
    press Enter.
@@ -136,12 +136,12 @@ To generate Zendesk chat OAuth token, please refer to this
    returned in the browser's URL field then it worked!
    ![Zendesk Chat](docs_images/Zendesk_chat_access_token.jpg)
 1. Safely store the OAuth token to authenticate Zendesk Chat for retrieving data.
-1. There are several other methods to obtain Zendesk chat token as given in the full
+1. There are several other methods to obtain a Zendesk chat token as given in the full
    [documentation here.](https://support.zendesk.com/hc/en-us/articles/4408828740762-Chat-API-tutorial-Generating-an-OAuth-token-integrated-Chat-accounts-#:~:text=Create%20the%20OAuth%20API%20client,-First%20of%20all&text=Go%20to%20Zendesk%20Chat%20%3E%20Account,Client%20to%20finish%20the%20setup.)
 
-### Zendesk Talk
+### Zendesk talk
 
-Zendesk talk fetches the data using Zendesk Tolk API.
+Zendesk Talk fetches the data using the Zendesk Talk API.
 
 1. Obtaining credentials for Zendesk Talk mirrors the process for
    [Zendesk support](#zendesk-support).
@@ -197,7 +197,7 @@ For more information, read the guide on [how to add a verified source.](../../wa
       - Method 2 ([subdomain](#subdomain) + email address + [API token](#grab-zendesk-support-api-token))
       - Method 3 ([subdomain](#subdomain) + [OAuth token](#zendesk-support-oauth-token))
 
-    To load data from Zendesk Chat use the following method for authentication:
+    To load data from Zendesk Chat, use the following method for authentication:
       - Method 1 ([subdomain](#subdomain) + [OAuth token](#grab-zendesk-chat-oauth-token))
 
    > Note: Use the Zendesk Support OAuth token for configuring Zendesk Support, and for
@@ -262,7 +262,7 @@ run.
 
 ### Resource `talk_resource`
 
-This function loads data from Zendesk talk endpoint.
+This function loads data from the Zendesk Talk endpoint.
 
 ```py
 def talk_resource(
@@ -276,14 +276,13 @@ def talk_resource(
 
 `zendesk_client`: An instance of ZendeskAPIClient for making API calls to Zendesk Talk.
 
-`talk_endpoint_name`: The name of the talk_endpoint.
+`talk_endpoint_name`: The name of the talk endpoint.
 
 `talk_endpoint`: The actual URL ending of the endpoint.
 
-`pagination`: Type of pagination type used by endpoint.
+`pagination_type`: Type of pagination used by the endpoint.
 
-
-Other functions similar to the source `zendesk_talk` and resources similar to `talk_endpoint`  are:
+Other functions similar to the source `zendesk_talk` and resources similar to `talk_endpoint` are:
 
 | Function Name             | Type      | Description                                                                                       |
 |---------------------------| --------- |---------------------------------------------------------------------------------------------------|
@@ -292,11 +291,12 @@ Other functions similar to the source `zendesk_talk` and resources similar to `t
 | talk_incremental_resource | resource  | Retrieves data incrementally from a Zendesk Talk endpoint.                                        |
 | zendesk_support           | source    | Retrieves data from Zendesk Support for tickets, users, brands, organizations, and groups         |
 | ticket_events             | resource  | Retrieves records of all changes made to a ticket, including state, etc.                          |
-| tickets                   | resource  | Retrieves the data for ticket table, the table can be pivoted and cols renamed                    |
+| tickets                   | resource  | Retrieves the data for the ticket table, which can be pivoted and columns renamed                 |
 | ticket_metric_events      | resource  | Retrieves ticket metric events from the start date, defaulting to January 1st of the current year |
-| basic_resource            | resource  | Retrives basic loader for Zenpy endpoints with pagination support                                 |
+| basic_resource            | resource  | Retrieves basic loader for Zenpy endpoints with pagination support                                |
 
 ## Customization
+
 ### Create your own pipeline
 
 If you wish to create your own pipelines, you can leverage source and resource methods from this
@@ -312,27 +312,27 @@ verified source.
    )
    ```
 
-1. To load data related to support, talk and chat:
+1. To load data related to support, talk, and chat:
 
    ```py
-    #zendesk support source function
+    # Zendesk support source function
     data_support = zendesk_support(load_all=True)
-    # zendesk chat source function
+    # Zendesk chat source function
     data_chat = zendesk_chat()
-    # zendesk talk source function
+    # Zendesk talk source function
     data_talk = zendesk_talk()
-    # run pipeline with all 3 sources
-    info = pipeline.run([data_support,data_chat,data_talk])
+    # Run pipeline with all 3 sources
+    info = pipeline.run([data_support, data_chat, data_talk])
     print(info)
    ```
 
-1. To load data related to support, chat and talk in incremental mode:
+1. To load data related to support, chat, and talk in incremental mode:
 
    ```py
    pipeline = dlt.pipeline(
         pipeline_name="dlt_zendesk_pipeline",  # Use a custom name if desired
         destination="duckdb",  # Choose the appropriate destination (e.g., duckdb, redshift, post)
-        dev_mode = False,
+        dev_mode=False,
         dataset_name="sample_zendesk_data"  # Use a custom name if desired
    )
    data = zendesk_support(load_all=True, start_date=start_date)
@@ -342,7 +342,7 @@ verified source.
    print(info)
    ```
 
-   > Supports incremental loading for Support, Chat, and Talk Endpoints. By default, it fetches data
+   > Supports incremental loading for Support, Chat, and Talk endpoints. By default, it fetches data
    > from the last load time in the dlt state or from 1st Jan 2000 if no prior load. This approach
    > ensures data retrieval since the specified date, while still updating the last load time.
 
@@ -350,13 +350,13 @@ verified source.
    for new tickets.
 
    ```py
-    # Load ranges of dates to load between January 1st 2023 and today
+    # Load ranges of dates between January 1st, 2023, and today
     min_start_date = pendulum.DateTime(year=2023, month=1, day=1).in_timezone("UTC")
     max_end_date = pendulum.today()
     # Generate tuples of date ranges, each with 1 week in between.
     ranges = make_date_ranges(min_start_date, max_end_date, timedelta(weeks=1))
 
-    # Run the pipeline in a loop for each 1 week range
+    # Run the pipeline in a loop for each 1-week range
     for start, end in ranges:
         print(f"Loading tickets between {start} and {end}")
         data = zendesk_support(start_date=start, end_date=end).with_resources("tickets")
@@ -374,3 +374,4 @@ verified source.
    > data. This approach can be used with all incremental Zendesk sources.
 
 <!--@@@DLT_TUBA zendesk-->
+

From 73ea048e672da74c2b1376d744a5a2cb201a83e8 Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Tue, 1 Oct 2024 19:50:45 +0200
Subject: [PATCH 11/29] Docs: sort core sources in the sidebar by usage (#1898)

---
 docs/website/sidebars.js | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js
index 32bb554842..d63684d3fc 100644
--- a/docs/website/sidebars.js
+++ b/docs/website/sidebars.js
@@ -64,19 +64,6 @@ const sidebars = {
         id: 'dlt-ecosystem/verified-sources/index',
       },
       items: [
-        {
-          type: 'category',
-          label: 'Filesystem & cloud storage',
-          description: 'AWS S3, Google Cloud Storage, Azure, SFTP, local file system',
-            link: {
-            type: 'doc',
-            id: 'dlt-ecosystem/verified-sources/filesystem/index',
-          },
-          items: [
-            'dlt-ecosystem/verified-sources/filesystem/basic',
-            'dlt-ecosystem/verified-sources/filesystem/advanced',
-          ]
-        },
         {
           type: 'category',
           label: 'REST APIs',
@@ -118,6 +105,19 @@ const sidebars = {
             'dlt-ecosystem/verified-sources/sql_database/advanced',
           ]
         },
+        {
+          type: 'category',
+          label: 'Filesystem & cloud storage',
+          description: 'AWS S3, Google Cloud Storage, Azure, SFTP, local file system',
+            link: {
+            type: 'doc',
+            id: 'dlt-ecosystem/verified-sources/filesystem/index',
+          },
+          items: [
+            'dlt-ecosystem/verified-sources/filesystem/basic',
+            'dlt-ecosystem/verified-sources/filesystem/advanced',
+          ]
+        },
         'dlt-ecosystem/verified-sources/airtable',
         'dlt-ecosystem/verified-sources/amazon_kinesis',
         'dlt-ecosystem/verified-sources/arrow-pandas',

From 7f75a0f3fe5ac6f792f3e63d90190eccd12a8bba Mon Sep 17 00:00:00 2001
From: dat-a-man <98139823+dat-a-man@users.noreply.github.com>
Date: Wed, 2 Oct 2024 00:40:07 +0530
Subject: [PATCH 12/29] Added troubleshooting section to filesystem docs
 (#1900)

Co-authored-by: Anton Burnashev <anton@dlthub.com>
---
 .../dlt-ecosystem/destinations/filesystem.md  | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
index 2be382c326..3e562dfb84 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
@@ -700,6 +700,31 @@ This destination fully supports [dlt state sync](../../general-usage/state#synci
 
 You will also notice `init` files being present in the root folder and the special `dlt` folders. In the absence of the concepts of schemas and tables in blob storages and directories, `dlt` uses these special files to harmonize the behavior of the `filesystem` destination with the other implemented destinations.
 
-**Note:** When a load generates a new state, for example when using incremental loads, a new state file appears in the `_dlt_pipeline_state` folder at the destination. To prevent data accumulation, state cleanup mechanisms automatically remove old state files, retaining only the latest 100 by default. This cleanup process can be customized or disabled using the filesystem configuration `max_state_files`, which determines the maximum number of pipeline state files to retain (default is 100). Setting this value to 0 or a negative number disables the cleanup of old states.
+:::note
+When a load generates a new state, for example when using incremental loads, a new state file appears in the `_dlt_pipeline_state` folder at the destination. To prevent data accumulation, state cleanup mechanisms automatically remove old state files, retaining only the latest 100 by default. This cleanup process can be customized or disabled using the filesystem configuration `max_state_files`, which determines the maximum number of pipeline state files to retain (default is 100). Setting this value to 0 or a negative number disables the cleanup of old states.
+:::
+
+## Troubleshooting
+### File Name Too Long Error
+When running your pipeline, you might encounter an error like `[Errno 36] File name too long Error`. This error occurs because the generated file name exceeds the maximum allowed length on your filesystem.
+
+To prevent the file name length error, set the `max_identifier_length` parameter for your destination. This truncates all identifiers (including filenames) to a specified maximum length.
+For example: 
+
+```py
+from dlt.destinations import duckdb
+
+pipeline = dlt.pipeline(
+    pipeline_name="your_pipeline_name",
+    destination=duckdb(
+        max_identifier_length=200,  # Adjust the length as needed
+    ),
+)
+```
+
+:::note
+- `max_identifier_length` truncates all identifiers (tables, columns). Ensure the length maintains uniqueness to avoid collisions.
+- Adjust `max_identifier_length` based on your data structure and filesystem limits.
+:::
 
 <!--@@@DLT_TUBA filesystem-->

From 90fc2aa8d02dfabea8a8c816dbae3d99842b6e0c Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Tue, 1 Oct 2024 21:35:50 +0200
Subject: [PATCH 13/29] Fix a typo in credentials/advanced.md (#1912)

---
 docs/website/docs/general-usage/credentials/advanced.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/website/docs/general-usage/credentials/advanced.md b/docs/website/docs/general-usage/credentials/advanced.md
index c25030a154..ad1adaa8f2 100644
--- a/docs/website/docs/general-usage/credentials/advanced.md
+++ b/docs/website/docs/general-usage/credentials/advanced.md
@@ -142,8 +142,8 @@ data_source = google_sheets(
 data_source.run(destination="bigquery")
 ```
 
-`dlt.config` and `dlt.secrets` behave like dictionaries from which you can request a value with any key name. `dlt` will look in all [config providers](setup) - env variables, TOML files, etc. to create these dictionaries. You can also use `dlt.config.get()` or `dlt.secrets.get()` to
-request a value cast to a desired type. For example:
+`dlt.config` and `dlt.secrets` behave like dictionaries from which you can request a value with any key name. `dlt` will look in all [config providers](setup) - environment variables, TOML files, etc. to create these dictionaries. You can also use `dlt.config.get()` or `dlt.secrets.get()` to
+request a value and cast it to a desired type. For example:
 
 ```py
 credentials = dlt.secrets.get("my_section.gcp_credentials", GcpServiceAccountCredentials)

From a76a06d9a8032e4bcb64739ce0384b35dbab0977 Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Tue, 1 Oct 2024 21:36:49 +0200
Subject: [PATCH 14/29] Remove code markup in credentials/advanced.md (#1911)

---
 docs/website/docs/general-usage/credentials/advanced.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/website/docs/general-usage/credentials/advanced.md b/docs/website/docs/general-usage/credentials/advanced.md
index ad1adaa8f2..f4b2ad0d11 100644
--- a/docs/website/docs/general-usage/credentials/advanced.md
+++ b/docs/website/docs/general-usage/credentials/advanced.md
@@ -8,7 +8,7 @@ keywords: [credentials, secrets.toml, secrets, config, configuration, environmen
 
 ## Injection mechanism
 
-`dlt` has a special treatment for functions decorated with `@dlt.source`, `@dlt.resource`, and `@dlt.destination`. When such a function is called, `dlt` takes the argument names in the signature and supplies (`injects`) the required values by looking for them in [various config providers](setup).
+`dlt` has a special treatment for functions decorated with `@dlt.source`, `@dlt.resource`, and `@dlt.destination`. When such a function is called, `dlt` takes the argument names in the signature and supplies (injects) the required values by looking for them in [various config providers](setup).
 
 ### Injection rules
 

From c312fb411fa34bbb0f8ed140141dcc1af8ecb71e Mon Sep 17 00:00:00 2001
From: David Scharf <shrps@posteo.net>
Date: Wed, 2 Oct 2024 12:13:59 +0200
Subject: [PATCH 15/29] docs: grammar fix pages 100 - 120 (#1908)

* grammar fix docs pages 100 to 120

* Apply suggestions from code review

Co-authored-by: Alena Astrakhantseva <alena@dlthub.com>

* Update docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md

Co-authored-by: Alena Astrakhantseva <alena@dlthub.com>

* Update docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md

Co-authored-by: Alena Astrakhantseva <alena@dlthub.com>

---------

Co-authored-by: Alena Astrakhantseva <alena@dlthub.com>
Co-authored-by: Anton Burnashev <anton.burnashev@gmail.com>
---
 .../verified-sources/filesystem/advanced.md   |  35 ++--
 .../verified-sources/filesystem/basic.md      |  63 ++++----
 .../verified-sources/filesystem/index.md      |   5 +-
 .../verified-sources/rest_api/advanced.md     |  11 +-
 .../verified-sources/rest_api/basic.md        |  60 +++----
 .../verified-sources/rest_api/index.md        |   5 +-
 .../verified-sources/sql_database/advanced.md |  56 +++----
 .../sql_database/configuration.md             |  64 ++++----
 .../visualizations/exploring-the-data.md      |  59 +++----
 docs/website/docs/examples/index.md           |   7 +-
 .../docs/reference/command-line-interface.md  | 105 ++++++------
 .../reference/frequently-asked-questions.md   |  10 +-
 docs/website/docs/reference/installation.md   |  11 +-
 docs/website/docs/reference/performance.md    | 134 +++++++--------
 docs/website/docs/reference/telemetry.md      |  45 ++----
 docs/website/docs/reference/tracing.md        |   4 +-
 .../docs/running-in-production/alerting.md    |  12 +-
 .../docs/running-in-production/monitoring.md  |  21 +--
 .../docs/running-in-production/running.md     | 153 ++++++++----------
 .../docs/running-in-production/tracing.md     |  15 +-
 20 files changed, 418 insertions(+), 457 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/advanced.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/advanced.md
index be08e9ff44..e1eeca0ee9 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/advanced.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/advanced.md
@@ -32,10 +32,10 @@ The filesystem ensures consistent file representation across bucket types and of
 
 #### `FileItem` fields
 
-- `file_url` - complete URL of the file (e.g. `s3://bucket-name/path/file`). This field serves as a primary key.
+- `file_url` - complete URL of the file (e.g., `s3://bucket-name/path/file`). This field serves as a primary key.
 - `file_name` - name of the file from the bucket URL.
 - `relative_path` - set when doing `glob`, is a relative path to a `bucket_url` argument.
-- `mime_type` - file's mime type. It is sourced from the bucket provider or inferred from its extension.
+- `mime_type` - file's MIME type. It is sourced from the bucket provider or inferred from its extension.
 - `modification_date` - file's last modification time (format: `pendulum.DateTime`).
 - `size_in_bytes` - file size.
 - `file_content` - content, provided upon request.
@@ -90,7 +90,7 @@ example_xls = filesystem(
     bucket_url=BUCKET_URL, file_glob="../directory/example.xlsx"
 ) | read_excel("example_table")   # Pass the data through the transformer to read the "example_table" sheet.
 
-pipeline = dlt.pipeline(pipeline_name="my_pipeline", destination="duckdb", dataset_name="example_xls_data",)
+pipeline = dlt.pipeline(pipeline_name="my_pipeline", destination="duckdb", dataset_name="example_xls_data")
 # Execute the pipeline and load the extracted data into the "duckdb" destination.
 load_info = pipeline.run(example_xls.with_name("example_xls_data"))
 # Print the loading information.
@@ -119,7 +119,7 @@ def read_xml(items: Iterator[FileItemDict]) -> Iterator[TDataItems]:
     for file_obj in items:
         # Open the file object.
         with file_obj.open() as file:
-            # Parse the file to dict records
+            # Parse the file to dict records.
             yield xmltodict.parse(file.read())
 
 # Set up the pipeline to fetch a specific XML file from a filesystem (bucket).
@@ -143,14 +143,14 @@ You can get an fsspec client from the filesystem resource after it was extracted
 from dlt.sources.filesystem import filesystem, read_csv
 from dlt.sources.filesystem.helpers import fsspec_from_resource
 
-# get filesystem source
+# Get filesystem source.
 gs_resource = filesystem("gs://ci-test-bucket/")
-# extract files
+# Extract files.
 pipeline = dlt.pipeline(pipeline_name="my_pipeline", destination="duckdb")
 pipeline.run(gs_resource | read_csv())
-# get fs client
+# Get fs client.
 fs_client = fsspec_from_resource(gs_resource)
-# do any operation
+# Do any operation.
 fs_client.ls("ci-test-bucket/standard_source/samples")
 ```
 
@@ -166,31 +166,32 @@ from dlt.common.storages.fsspec_filesystem import FileItemDict
 from dlt.sources.filesystem import filesystem
 
 def _copy(item: FileItemDict) -> FileItemDict:
-    # instantiate fsspec and copy file
+    # Instantiate fsspec and copy file
     dest_file = os.path.join(local_folder, item["file_name"])
-    # create dest folder
+    # Create destination folder
     os.makedirs(os.path.dirname(dest_file), exist_ok=True)
-    # download file
+    # Download file
     item.fsspec.download(item["file_url"], dest_file)
-    # return file item unchanged
+    # Return file item unchanged
     return item
 
 BUCKET_URL = "gs://ci-test-bucket/"
 
-# use recursive glob pattern and add file copy step
+# Use recursive glob pattern and add file copy step
 downloader = filesystem(BUCKET_URL, file_glob="**").add_map(_copy)
 
-# NOTE: you do not need to load any data to execute extract, below we obtain
+# NOTE: You do not need to load any data to execute extract; below, we obtain
 # a list of files in a bucket and also copy them locally
 listing = list(downloader)
 print(listing)
-# download to table "listing"
+# Download to table "listing"
 pipeline = dlt.pipeline(pipeline_name="my_pipeline", destination="duckdb")
 load_info = pipeline.run(
     downloader.with_name("listing"), write_disposition="replace"
 )
-# pretty print the information on data that was loaded
+# Pretty print the information on data that was loaded
 print(load_info)
 print(listing)
 print(pipeline.last_trace.last_normalize_info)
-```
\ No newline at end of file
+```
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
index 6eb02b4edf..5ae7de82da 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
@@ -10,7 +10,7 @@ Filesystem source allows loading files from remote locations (AWS S3, Google Clo
 
 To load unstructured data (`.pdf`, `.txt`, e-mail), please refer to the [unstructured data source](https://github.com/dlt-hub/verified-sources/tree/master/sources/unstructured_data).
 
-## How Filesystem source works?
+## How filesystem source works
 
 The Filesystem source doesn't just give you an easy way to load data from both remote and local files — it also comes with a powerful set of tools that let you customize the loading process to fit your specific needs.
 
@@ -54,7 +54,7 @@ To get started with your data pipeline, follow these steps:
    dlt init filesystem duckdb
    ```
 
-   [dlt init command](../../../reference/command-line-interface) will initialize
+   The [dlt init command](../../../reference/command-line-interface) will initialize
    [the pipeline example](https://github.com/dlt-hub/verified-sources/blob/master/sources/filesystem_pipeline.py)
    with the filesystem as the source and [duckdb](../../destinations/duckdb.md) as the destination.
 
@@ -66,6 +66,8 @@ To get started with your data pipeline, follow these steps:
 
 ## Configuration
 
+
+
 ### Get credentials
 
 <Tabs
@@ -132,7 +134,7 @@ dlt supports several authentication methods:
 3. Username/Password authentication
 4. GSS-API authentication
 
-Learn more about SFTP authentication options in [SFTP section](../../destinations/filesystem#sftp). To obtain credentials, contact your server administrator.
+Learn more about SFTP authentication options in the [SFTP section](../../destinations/filesystem#sftp). To obtain credentials, contact your server administrator.
 </TabItem>
 
 <TabItem value="local">
@@ -145,7 +147,7 @@ You don't need any credentials for the local filesystem.
 
 To provide credentials to the filesystem source, you can use [any method available](../../../general-usage/credentials/setup#available-config-providers) in `dlt`.
 One of the easiest ways is to use configuration files. The `.dlt` folder in your working directory
-contains two files: `config.toml` and  `secrets.toml`. Sensitive information, like passwords and
+contains two files: `config.toml` and `secrets.toml`. Sensitive information, like passwords and
 access tokens, should only be put into `secrets.toml`, while any other configuration, like the path to
 a bucket, can be specified in `config.toml`.
 
@@ -212,7 +214,7 @@ bucket_url="gs://<bucket_name>/<path_to_files>/"
 <TabItem value="sftp">
 
 Learn how to set up SFTP credentials for each authentication method in the [SFTP section](../../destinations/filesystem#sftp).
-For example, in case of key-based authentication, you can configure the source the following way:
+For example, in the case of key-based authentication, you can configure the source the following way:
 
 ```toml
 # secrets.toml
@@ -229,7 +231,7 @@ bucket_url = "sftp://[hostname]/[path]"
 
 <TabItem value="local">
 
-You can use both native local filesystem paths and `file://` URI. Absolute, relative, and UNC Windows paths are supported.
+You can use both native local filesystem paths and the `file://` URI. Absolute, relative, and UNC Windows paths are supported.
 
 You could provide an absolute filepath:
 
@@ -239,7 +241,7 @@ You could provide an absolute filepath:
 bucket_url='file://Users/admin/Documents/csv_files'
 ```
 
-Or skip the schema and provide the local path in a format native for your operating system. For example, for Windows:
+Or skip the schema and provide the local path in a format native to your operating system. For example, for Windows:
 
 ```toml
 [sources.filesystem]
@@ -250,7 +252,7 @@ bucket_url='~\Documents\csv_files\'
 
 </Tabs>
 
-You can also specify the credentials using Environment variables. The name of the corresponding environment
+You can also specify the credentials using environment variables. The name of the corresponding environment
 variable should be slightly different from the corresponding name in the `toml` file. Simply replace dots `.` with double
 underscores `__`:
 
@@ -260,7 +262,7 @@ export SOURCES__FILESYSTEM__AWS_SECRET_ACCESS_KEY = "Please set me up!"
 ```
 
 :::tip
-`dlt` supports more ways of authorizing with the cloud storage, including identity-based
+`dlt` supports more ways of authorizing with cloud storage, including identity-based
 and default credentials. To learn more about adding credentials to your pipeline, please refer to the
 [Configuration and secrets section](../../../general-usage/credentials/complex_types#gcp-credentials).
 :::
@@ -310,7 +312,7 @@ or taken from the config:
 Full list of `filesystem` resource parameters:
 
 * `bucket_url` - full URL of the bucket (could be a relative path in the case of the local filesystem).
-* `credentials` - cloud storage credentials of `AbstractFilesystem` instance (should be empty for the local filesystem). We recommend not to specify this parameter in the code, but put it in secrets file instead.
+* `credentials` - cloud storage credentials of `AbstractFilesystem` instance (should be empty for the local filesystem). We recommend not specifying this parameter in the code, but putting it in a secrets file instead.
 * `file_glob` -  file filter in glob format. Defaults to listing all non-recursive files in the bucket URL.
 * `files_per_page` - number of files processed at once. The default value is `100`.
 * `extract_content` - if true, the content of the file will be read and returned in the resource. The default value is `False`.
@@ -332,15 +334,15 @@ filesystem_pipe = filesystem(
 
 #### Available transformers
 
-- `read_csv()` - process `csv` files using `pandas`
-- `read_jsonl()` - process `jsonl` files chuck by chunk
-- `read_parquet()` - process `parquet` files using `pyarrow`
-- `read_csv_duckdb()` - this transformer process `csv` files using DuckDB, which usually shows better performance, than `pandas`.
+- `read_csv()` - processes `csv` files using `pandas`
+- `read_jsonl()` - processes `jsonl` files chunk by chunk
+- `read_parquet()` - processes `parquet` files using `pyarrow`
+- `read_csv_duckdb()` - this transformer processes `csv` files using DuckDB, which usually shows better performance than `pandas`.
 
 :::tip
 We advise that you give each resource a
 [specific name](../../../general-usage/resource#duplicate-and-rename-resources)
-before loading with `pipeline.run`. This will make sure that data goes to a table with the name you
+before loading with `pipeline.run`. This will ensure that data goes to a table with the name you
 want and that each pipeline uses a
 [separate state for incremental loading.](../../../general-usage/state#read-and-write-pipeline-state-in-a-resource)
 :::
@@ -366,7 +368,7 @@ import dlt
 from dlt.sources.filesystem import filesystem, read_csv
 
 filesystem_pipe = filesystem(bucket_url="file://Users/admin/Documents/csv_files", file_glob="*.csv") | read_csv()
-# tell dlt to merge on date
+# Tell dlt to merge on date
 filesystem_pipe.apply_hints(write_disposition="merge", merge_key="date")
 
 # We load the data into the table_name table
@@ -380,19 +382,19 @@ print(load_info)
 Here are a few simple ways to load your data incrementally:
 
 1. [Load files based on modification date](#load-files-based-on-modification-date). Only load files that have been updated since the last time `dlt` processed them. `dlt` checks the files' metadata (like the modification date) and skips those that haven't changed.
-2. [Load new records based on a specific column](#load-new-records-based-on-a-specific-column). You can load only the new or updated records by looking at a specific column, like `updated_at`. Unlike the first method, this approach would read all files every time and then filter the records which was updated.
-3. [Combine loading only updated files and records](#combine-loading-only-updated-files-and-records). Finally, you can combine both methods. It could be useful if new records could be added to existing files, so you not only want to filter the modified files, but modified records as well.
+2. [Load new records based on a specific column](#load-new-records-based-on-a-specific-column). You can load only the new or updated records by looking at a specific column, like `updated_at`. Unlike the first method, this approach would read all files every time and then filter the records which were updated.
+3. [Combine loading only updated files and records](#combine-loading-only-updated-files-and-records). Finally, you can combine both methods. It could be useful if new records could be added to existing files, so you not only want to filter the modified files, but also the modified records.
 
 #### Load files based on modification date
-For example, to load only new CSV files with [incremental loading](../../../general-usage/incremental-loading) you can use `apply_hints` method.
+For example, to load only new CSV files with [incremental loading](../../../general-usage/incremental-loading), you can use the `apply_hints` method.
 
 ```py
 import dlt
 from dlt.sources.filesystem import filesystem, read_csv
 
-# This configuration will only consider new csv files
+# This configuration will only consider new CSV files
 new_files = filesystem(bucket_url="s3://bucket_name", file_glob="directory/*.csv")
-# add incremental on modification time
+# Add incremental on modification time
 new_files.apply_hints(incremental=dlt.sources.incremental("modification_date"))
 
 pipeline = dlt.pipeline(pipeline_name="my_pipeline", destination="duckdb")
@@ -402,13 +404,13 @@ print(load_info)
 
 #### Load new records based on a specific column
 
-In this example we load only new records based on the field called `updated_at`. This method may be useful if you are not able to
-filter files by modification date because for example, all files are modified each time new record is appeared.
+In this example, we load only new records based on the field called `updated_at`. This method may be useful if you are not able to
+filter files by modification date because, for example, all files are modified each time a new record appears.
 ```py
 import dlt
 from dlt.sources.filesystem import filesystem, read_csv
 
-# We consider all csv files
+# We consider all CSV files
 all_files = filesystem(bucket_url="s3://bucket_name", file_glob="directory/*.csv")
 
 # But filter out only updated records
@@ -425,11 +427,11 @@ print(load_info)
 import dlt
 from dlt.sources.filesystem import filesystem, read_csv
 
-# This configuration will only consider modified csv files
+# This configuration will only consider modified CSV files
 new_files = filesystem(bucket_url="s3://bucket_name", file_glob="directory/*.csv")
 new_files.apply_hints(incremental=dlt.sources.incremental("modification_date"))
 
-# And in each modified file we filter out only updated records
+# And in each modified file, we filter out only updated records
 filesystem_pipe = (new_files | read_csv())
 filesystem_pipe.apply_hints(incremental=dlt.sources.incremental("updated_at"))
 pipeline = dlt.pipeline(pipeline_name="my_pipeline", destination="duckdb")
@@ -459,7 +461,7 @@ print(load_info)
 ```
 
 :::tip
-You could also use `file_glob` to filter files by names. It works very well in simple cases, for example, filtering by extention:
+You could also use `file_glob` to filter files by names. It works very well in simple cases, for example, filtering by extension:
 ```py
 from dlt.sources.filesystem import filesystem
 
@@ -493,8 +495,8 @@ print(load_info)
 
 Windows supports paths up to 255 characters. When you access a path longer than 255 characters, you'll see a `FileNotFound` exception.
 
- To go over this limit, you can use [extended paths](https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry).
- **Note that Python glob does not work with extended UNC paths**, so you will not be able to use them
+To go over this limit, you can use [extended paths](https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=registry).
+**Note that Python glob does not work with extended UNC paths**, so you will not be able to use them
 
 ```toml
 [sources.filesystem]
@@ -514,4 +516,5 @@ function to configure the resource correctly. Use `**` to include recursive file
 filesystem supports full Python [glob](https://docs.python.org/3/library/glob.html#glob.glob) functionality,
 while cloud storage supports a restricted `fsspec` [version](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob).
 
-<!--@@@DLT_TUBA filesystem-->
\ No newline at end of file
+<!--@@@DLT_TUBA filesystem-->
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md
index 1441931340..0aaa07b0c3 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md
@@ -12,8 +12,9 @@ The Filesystem source allows seamless loading of files from the following locati
 * remote filesystem (via SFTP)
 * local filesystem
 
-The Filesystem source natively supports `csv`, `parquet`, and `jsonl` files and allows customization for loading any type of structured files.
+The Filesystem source natively supports `csv`, `parquet`, and `jsonl` files and allows customization for loading any type of structured file.
 
 import DocCardList from '@theme/DocCardList';
 
-<DocCardList />
\ No newline at end of file
+<DocCardList />
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/advanced.md b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/advanced.md
index 27d2cc0b6e..26add81def 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/advanced.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/advanced.md
@@ -9,15 +9,15 @@ keywords: [rest api, restful api]
 - `config`: The REST API configuration dictionary.
 - `name`: An optional name for the source.
 - `section`: An optional section name in the configuration file.
-- `max_table_nesting`: Sets the maximum depth of nested table above which the remaining nodes are loaded as structs or JSON.
-- `root_key` (bool): Enables merging on all resources by propagating root foreign key to nested tables. This option is most useful if you plan to change write disposition of a resource to disable/enable merge. Defaults to False.
+- `max_table_nesting`: Sets the maximum depth of nested tables above which the remaining nodes are loaded as structs or JSON.
+- `root_key` (bool): Enables merging on all resources by propagating the root foreign key to nested tables. This option is most useful if you plan to change the write disposition of a resource to disable/enable merge. Defaults to False.
 - `schema_contract`: Schema contract settings that will be applied to this resource.
 - `spec`: A specification of configuration and secret values required by the source.
 
 ### Response actions
 
 The `response_actions` field in the endpoint configuration allows you to specify how to handle specific responses or all responses from the API. For example, responses with specific status codes or content substrings can be ignored.
-Additionally, all responses or only responses with specific status codes or content substrings can be transformed with a custom callable, such as a function. This callable is passed on to the requests library as a [response hook](https://requests.readthedocs.io/en/latest/user/advanced/#event-hooks). The callable can modify the response object and has to return it for the modifications to take effect.
+Additionally, all responses or only responses with specific status codes or content substrings can be transformed with a custom callable, such as a function. This callable is passed on to the requests library as a [response hook](https://requests.readthedocs.io/en/latest/user/advanced/#event-hooks). The callable can modify the response object and must return it for the modifications to take effect.
 
 :::caution Experimental Feature
 This is an experimental feature and may change in future releases.
@@ -55,7 +55,7 @@ from requests.models import Response
 from dlt.common import json
 
 def set_encoding(response, *args, **kwargs):
-    # sets the encoding in case it's not correctly detected
+    # Sets the encoding in case it's not correctly detected
     response.encoding = 'windows-1252'
     return response
 
@@ -99,7 +99,7 @@ In this example, the resource will set the correct encoding for all responses fi
 
 ```py
 def set_encoding(response, *args, **kwargs):
-    # sets the encoding in case it's not correctly detected
+    # Sets the encoding in case it's not correctly detected
     response.encoding = 'windows-1252'
     return response
 
@@ -122,3 +122,4 @@ source_config = {
 ```
 
 In this example, the resource will set the correct encoding for all responses. More callables can be added to the list of response_actions.
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md
index 121769a11a..03214950f4 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md
@@ -62,7 +62,7 @@ pipeline = dlt.pipeline(
 load_info = pipeline.run(source)
 ```
 
-Running this pipeline will create two tables in the DuckDB: `posts` and `comments` with the data from the respective API endpoints. The `comments` resource will fetch comments for each post by using the `id` field from the `posts` resource.
+Running this pipeline will create two tables in DuckDB: `posts` and `comments` with the data from the respective API endpoints. The `comments` resource will fetch comments for each post by using the `id` field from the `posts` resource.
 
 ## Setup
 
@@ -132,9 +132,11 @@ github_token = "your_github_token"
 
 ## Source configuration
 
+
+
 ### Quick example
 
-Let's take a look at the GitHub example in `rest_api_pipeline.py` file:
+Let's take a look at the GitHub example in the `rest_api_pipeline.py` file:
 
 ```py
 from dlt.sources.rest_api import RESTAPIConfig, rest_api_resources
@@ -206,14 +208,14 @@ def load_github() -> None:
 
 The declarative resource configuration is defined in the `config` dictionary. It contains the following key components:
 
-1. `client`: Defines the base URL and authentication method for the API. In this case it uses token-based authentication. The token is stored in the `secrets.toml` file.
+1. `client`: Defines the base URL and authentication method for the API. In this case, it uses token-based authentication. The token is stored in the `secrets.toml` file.
 
 2. `resource_defaults`: Contains default settings for all [resources](#resource-configuration). In this example, we define that all resources:
     - Have `id` as the [primary key](../../../general-usage/resource#define-schema)
     - Use the `merge` [write disposition](../../../general-usage/incremental-loading#choosing-a-write-disposition) to merge the data with the existing data in the destination.
-    - Send a `per_page` query parameter with each request to 100 to get more results per page.
+    - Send a `per_page=100` query parameter with each request to get more results per page.
 
-3. `resources`: A list of [resources](#resource-configuration) to be loaded. Here, we have two resources: `issues` and `issue_comments`, which correspond to the GitHub API endpoints for [repository issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues) and [issue comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments). Note that we need a in issue number to fetch comments for each issue. This number is taken from the `issues` resource. More on this in the [resource relationships](#define-resource-relationships) section.
+3. `resources`: A list of [resources](#resource-configuration) to be loaded. Here, we have two resources: `issues` and `issue_comments`, which correspond to the GitHub API endpoints for [repository issues](https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28#list-repository-issues) and [issue comments](https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments). Note that we need an issue number to fetch comments for each issue. This number is taken from the `issues` resource. More on this in the [resource relationships](#define-resource-relationships) section.
 
 Let's break down the configuration in more detail.
 
@@ -227,7 +229,6 @@ from dlt.sources.rest_api import RESTAPIConfig
 ```
 :::
 
-
 The configuration object passed to the REST API Generic Source has three main elements:
 
 ```py
@@ -297,7 +298,7 @@ Both `resource1` and `resource2` will have the `per_page` parameter set to 100.
 
 This is a list of resource configurations that define the API endpoints to be loaded. Each resource configuration can be:
 - a dictionary with the [resource configuration](#resource-configuration).
-- a string. In this case, the string is used as the both as the endpoint path and the resource name, and the resource configuration is taken from the `resource_defaults` configuration if it exists.
+- a string. In this case, the string is used as both the endpoint path and the resource name, and the resource configuration is taken from the `resource_defaults` configuration if it exists.
 
 ### Resource configuration
 
@@ -337,7 +338,7 @@ The endpoint configuration defines how to query the API endpoint. Quick example:
 The fields in the endpoint configuration are:
 
 - `path`: The path to the API endpoint.
-- `method`: The HTTP method to be used. Default is `GET`.
+- `method`: The HTTP method to be used. The default is `GET`.
 - `params`: Query parameters to be sent with each request. For example, `sort` to order the results or `since` to specify [incremental loading](#incremental-loading). This is also used to define [resource relationships](#define-resource-relationships).
 - `json`: The JSON payload to be sent with the request (for POST and PUT requests).
 - `paginator`: Pagination configuration for the endpoint. See the [pagination](#pagination) section for more details.
@@ -398,7 +399,7 @@ from dlt.sources.helpers.rest_client.paginators import JSONLinkPaginator
 ```
 
 :::note
-Currently pagination is supported only for GET requests. To handle POST requests with pagination, you need to implement a [custom paginator](../../../general-usage/http/rest-client.md#custom-paginator).
+Currently, pagination is supported only for GET requests. To handle POST requests with pagination, you need to implement a [custom paginator](../../../general-usage/http/rest-client.md#custom-paginator).
 :::
 
 These are the available paginators:
@@ -407,9 +408,9 @@ These are the available paginators:
 | ------------ | -------------- | ----------- |
 | `json_link` | [JSONLinkPaginator](../../../general-usage/http/rest-client.md#jsonresponsepaginator) | The link to the next page is in the body (JSON) of the response.<br/>*Parameters:*<ul><li>`next_url_path` (str) - the JSONPath to the next page URL</li></ul> |
 | `header_link` | [HeaderLinkPaginator](../../../general-usage/http/rest-client.md#headerlinkpaginator) | The links to the next page are in the response headers.<br/>*Parameters:*<ul><li>`links_next_key` (str) - the name of the header containing the links. Default is "next".</li></ul> |
-| `offset` | [OffsetPaginator](../../../general-usage/http/rest-client.md#offsetpaginator) | The pagination is based on an offset parameter. With total items count either in the response body or explicitly provided.<br/>*Parameters:*<ul><li>`limit` (int) - the maximum number of items to retrieve in each request</li><li>`offset` (int) - the initial offset for the first request. Defaults to `0`</li><li>`offset_param` (str) - the name of the query parameter used to specify the offset. Defaults to "offset"</li><li>`limit_param` (str) - the name of the query parameter used to specify the limit. Defaults to "limit"</li><li>`total_path` (str) - a JSONPath expression for the total number of items. If not provided, pagination is controlled by `maximum_offset` and `stop_after_empty_page`</li><li>`maximum_offset` (int) - optional maximum offset value. Limits pagination even without total count</li><li>`stop_after_empty_page` (bool) - Whether pagination should stop when a page contains no result items. Defaults to `True`</li></ul> |
-| `page_number` | [PageNumberPaginator](../../../general-usage/http/rest-client.md#pagenumberpaginator) | The pagination is based on a page number parameter. With total pages count either in the response body or explicitly provided.<br/>*Parameters:*<ul><li>`base_page` (int) - the starting page number. Defaults to `0`</li><li>`page_param` (str) - the query parameter name for the page number. Defaults to "page"</li><li>`total_path` (str) - a JSONPath expression for the total number of pages. If not provided, pagination is controlled by `maximum_page` and `stop_after_empty_page`</li><li>`maximum_page` (int) - optional maximum page number. Stops pagination once this page is reached</li><li>`stop_after_empty_page` (bool) - Whether pagination should stop when a page contains no result items. Defaults to `True`</li></ul> |
-| `cursor` | [JSONResponseCursorPaginator](../../../general-usage/http/rest-client.md#jsonresponsecursorpaginator) | The pagination is based on a cursor parameter. The value of the cursor is in the response body (JSON).<br/>*Parameters:*<ul><li>`cursor_path` (str) - the JSONPath to the cursor value. Defaults to "cursors.next"</li><li>`cursor_param` (str) - the query parameter name for the cursor. Defaults to "after"</li></ul> |
+| `offset` | [OffsetPaginator](../../../general-usage/http/rest-client.md#offsetpaginator) | The pagination is based on an offset parameter, with the total items count either in the response body or explicitly provided.<br/>*Parameters:*<ul><li>`limit` (int) - the maximum number of items to retrieve in each request</li><li>`offset` (int) - the initial offset for the first request. Defaults to `0`</li><li>`offset_param` (str) - the name of the query parameter used to specify the offset. Defaults to "offset"</li><li>`limit_param` (str) - the name of the query parameter used to specify the limit. Defaults to "limit"</li><li>`total_path` (str) - a JSONPath expression for the total number of items. If not provided, pagination is controlled by `maximum_offset` and `stop_after_empty_page`</li><li>`maximum_offset` (int) - optional maximum offset value. Limits pagination even without total count</li><li>`stop_after_empty_page` (bool) - Whether pagination should stop when a page contains no result items. Defaults to `True`</li></ul> |
+| `page_number` | [PageNumberPaginator](../../../general-usage/http/rest-client.md#pagenumberpaginator) | The pagination is based on a page number parameter, with the total pages count either in the response body or explicitly provided.<br/>*Parameters:*<ul><li>`base_page` (int) - the starting page number. Defaults to `0`</li><li>`page_param` (str) - the query parameter name for the page number. Defaults to "page"</li><li>`total_path` (str) - a JSONPath expression for the total number of pages. If not provided, pagination is controlled by `maximum_page` and `stop_after_empty_page`</li><li>`maximum_page` (int) - optional maximum page number. Stops pagination once this page is reached</li><li>`stop_after_empty_page` (bool) - Whether pagination should stop when a page contains no result items. Defaults to `True`</li></ul> |
+| `cursor` | [JSONResponseCursorPaginator](../../../general-usage/http/rest-client.md#jsonresponsecursorpaginator) | The pagination is based on a cursor parameter, with the value of the cursor in the response body (JSON).<br/>*Parameters:*<ul><li>`cursor_path` (str) - the JSONPath to the cursor value. Defaults to "cursors.next"</li><li>`cursor_param` (str) - the query parameter name for the cursor. Defaults to "after"</li></ul> |
 | `single_page` | SinglePagePaginator | The response will be interpreted as a single-page response, ignoring possible pagination metadata. |
 | `auto` | `None` | Explicitly specify that the source should automatically detect the pagination method. |
 
@@ -431,7 +432,7 @@ rest_api.config_setup.register_paginator("custom_paginator", CustomPaginator)
 
 ### Data selection
 
-The `data_selector` field in the endpoint configuration allows you to specify a JSONPath to select the data from the response. By default, the source will try to detect locations of the data automatically.
+The `data_selector` field in the endpoint configuration allows you to specify a JSONPath to select the data from the response. By default, the source will try to detect the locations of the data automatically.
 
 Use this field when you need to specify the location of the data in the response explicitly.
 
@@ -481,7 +482,6 @@ You can use the following endpoint configuration:
 
 Read more about [JSONPath syntax](https://github.com/h2non/jsonpath-ng?tab=readme-ov-file#jsonpath-syntax) to learn how to write selectors.
 
-
 ### Authentication
 
 For APIs that require authentication to access their endpoints, the REST API source supports various authentication methods, including token-based authentication, query parameters, basic authentication, and custom authentication. The authentication configuration is specified in the `auth` field of the [client](#client) either as a dictionary or as an instance of the [authentication class](../../../general-usage/http/rest-client.md#authentication).
@@ -510,7 +510,7 @@ Available authentication types:
 
 | Authentication class | String Alias (`type`) | Description |
 | ------------------- | ----------- | ----------- |
-| [BearTokenAuth](../../../general-usage/http/rest-client.md#bearer-token-authentication) | `bearer` | Bearer token authentication. |
+| [BearerTokenAuth](../../../general-usage/http/rest-client.md#bearer-token-authentication) | `bearer` | Bearer token authentication. |
 | [HTTPBasicAuth](../../../general-usage/http/rest-client.md#http-basic-authentication) | `http_basic` | Basic HTTP authentication. |
 | [APIKeyAuth](../../../general-usage/http/rest-client.md#api-key-authentication) | `api_key` | API key authentication with key defined in the query parameters or in the headers. |
 | [OAuth2ClientCredentials](../../../general-usage/http/rest-client.md#oauth20-authorization) | N/A | OAuth 2.0 authorization with a temporary access token obtained from the authorization server. |
@@ -537,7 +537,7 @@ from dlt.sources.helpers.rest_client.auth import BearerTokenAuth
 
 config = {
     "client": {
-        "auth": BearTokenAuth(dlt.secrets["your_api_token"]),
+        "auth": BearerTokenAuth(dlt.secrets["your_api_token"]),
     },
     # ...
 }
@@ -551,7 +551,7 @@ Available authentication types:
 
 | `type` | Authentication class | Description |
 | ----------- | ------------------- | ----------- |
-| `bearer` | [BearTokenAuth](../../../general-usage/http/rest-client.md#bearer-token-authentication) | Bearer token authentication.<br/>Parameters:<ul><li>`token` (str)</li></ul> |
+| `bearer` | [BearerTokenAuth](../../../general-usage/http/rest-client.md#bearer-token-authentication) | Bearer token authentication.<br/>Parameters:<ul><li>`token` (str)</li></ul> |
 | `http_basic` | [HTTPBasicAuth](../../../general-usage/http/rest-client.md#http-basic-authentication) | Basic HTTP authentication.<br/>Parameters:<ul><li>`username` (str)</li><li>`password` (str)</li></ul> |
 | `api_key` | [APIKeyAuth](../../../general-usage/http/rest-client.md#api-key-authentication) | API key authentication with key defined in the query parameters or in the headers. <br/>Parameters:<ul><li>`name` (str) - the name of the query parameter or header</li><li>`api_key` (str) - the API key value</li><li>`location` (str, optional) - the location of the API key in the request. Can be `query` or `header`. Default is `header`</li></ul> |
 
@@ -572,10 +572,9 @@ rest_api.config_setup.register_auth("custom_auth", CustomAuth)
 }
 ```
 
-
 ### Define resource relationships
 
-When you have a resource that depends on another resource, you can define the relationship using the `resolve` configuration. With it you link a path parameter in the child resource to a field in the parent resource's data.
+When you have a resource that depends on another resource, you can define the relationship using the `resolve` configuration. With it, you link a path parameter in the child resource to a field in the parent resource's data.
 
 In the GitHub example, the `issue_comments` resource depends on the `issues` resource. The `issue_number` parameter in the `issue_comments` endpoint configuration is resolved from the `number` field of the `issues` resource:
 
@@ -653,7 +652,7 @@ You can include data from the parent resource in the child resource by using the
 }
 ```
 
-This will include the `id`, `title`, and `created_at` fields from the `issues` resource in the `issue_comments` resource data. The name of the included fields will be prefixed with the parent resource name and an underscore (`_`) like so: `_issues_id`, `_issues_title`, `_issues_created_at`.
+This will include the `id`, `title`, and `created_at` fields from the `issues` resource in the `issue_comments` resource data. The names of the included fields will be prefixed with the parent resource name and an underscore (`_`) like so: `_issues_id`, `_issues_title`, `_issues_created_at`.
 
 ### Define a resource which is not a REST endpoint
 
@@ -661,7 +660,7 @@ Sometimes, we want to request endpoints with specific values that are not return
 Thus, you can also include arbitrary dlt resources in your `RESTAPIConfig` instead of defining a resource for every path!
 
 In the following example, we want to load the issues belonging to three repositories.
-Instead of defining now three different issues resources, one for each of the paths `dlt-hub/dlt/issues/`, `dlt-hub/verified-sources/issues/`, `dlt-hub/dlthub-education/issues/`, we have a resource `repositories` which yields a list of repository names which will be fetched by the dependent resource `issues`.
+Instead of defining three different issues resources, one for each of the paths `dlt-hub/dlt/issues/`, `dlt-hub/verified-sources/issues/`, `dlt-hub/dlthub-education/issues/`, we have a resource `repositories` which yields a list of repository names that will be fetched by the dependent resource `issues`.
 
 ```py
 from dlt.sources.rest_api import RESTAPIConfig
@@ -830,7 +829,7 @@ For example, if we query the endpoint with `https://api.example.com/posts?create
 }
 ```
 
-To enable the incremental loading for this endpoint, you can use the following endpoint configuration:
+To enable incremental loading for this endpoint, you can use the following endpoint configuration:
 
 ```py
 {
@@ -851,7 +850,7 @@ So in our case, the next request will be made to `https://api.example.com/posts?
 
 Let's break down the configuration.
 
-1. We explicitly set `data_selector` to `"results"` to select the list of posts from the response. This is optional, if not set, dlt will try to auto-detect the data location.
+1. We explicitly set `data_selector` to `"results"` to select the list of posts from the response. This is optional; if not set, dlt will try to auto-detect the data location.
 2. We define the `created_since` parameter as an incremental parameter with the following fields:
 
 ```py
@@ -865,7 +864,7 @@ Let's break down the configuration.
 ```
 
 - `type`: The type of the parameter definition. In this case, it must be set to `incremental`.
-- `cursor_path`: The JSONPath to the field within each item in the list. The value of this field will be used in the next request. In the example above our items look like `{"id": 1, "title": "Post 1", "created_at": "2024-01-26"}` so to track the created time we set `cursor_path` to `"created_at"`. Note that the JSONPath starts from the root of the item (dict) and not from the root of the response.
+- `cursor_path`: The JSONPath to the field within each item in the list. The value of this field will be used in the next request. In the example above, our items look like `{"id": 1, "title": "Post 1", "created_at": "2024-01-26"}` so to track the created time, we set `cursor_path` to `"created_at"`. Note that the JSONPath starts from the root of the item (dict) and not from the root of the response.
 - `initial_value`: The initial value for the cursor. This is the value that will initialize the state of incremental loading. In this case, it's `2024-01-25`. The value type should match the type of the field in the data item.
 
 ### Incremental loading using the `incremental` field
@@ -906,7 +905,7 @@ The full available configuration for the `incremental` field is:
 The fields are:
 
 - `start_param` (str): The name of the query parameter to be used as the start condition. If we use the example above, it would be `"created_since"`.
-- `end_param` (str): The name of the query parameter to be used as the end condition. This is optional and can be omitted if you only need to track the start condition. This is useful when you need to fetch data within a specific range and the API supports end conditions (like `created_before` query parameter).
+- `end_param` (str): The name of the query parameter to be used as the end condition. This is optional and can be omitted if you only need to track the start condition. This is useful when you need to fetch data within a specific range and the API supports end conditions (like the `created_before` query parameter).
 - `cursor_path` (str): The JSONPath to the field within each item in the list. This is the field that will be used to track the incremental loading. In the example above, it's `"created_at"`.
 - `initial_value` (str): The initial value for the cursor. This is the value that will initialize the state of incremental loading.
 - `end_value` (str): The end value for the cursor to stop the incremental loading. This is optional and can be omitted if you only need to track the start condition. If you set this field, `initial_value` needs to be set as well.
@@ -920,7 +919,7 @@ If you encounter issues with incremental loading, see the [troubleshooting secti
 
 If you need to transform the values in the cursor field before passing them to the API endpoint, you can specify a callable under the key `convert`. For example, the API might return UNIX epoch timestamps but expects to be queried with an ISO 8601 date. To achieve that, we can specify a function that converts from the date format returned by the API to the date format required for API requests.
 
-In the following examples, `1704067200` is returned from the API in the field `updated_at` but the API will be called with `?created_since=2024-01-01`.
+In the following examples, `1704067200` is returned from the API in the field `updated_at`, but the API will be called with `?created_since=2024-01-01`.
 
 Incremental loading using the `params` field:
 ```py
@@ -963,7 +962,7 @@ This also provides details on the HTTP requests.
 
 #### Getting validation errors
 
-When you running the pipeline and getting a `DictValidationException`, it means that the [source configuration](#source-configuration) is incorrect. The error message provides details on the issue including the path to the field and the expected type.
+When you are running the pipeline and getting a `DictValidationException`, it means that the [source configuration](#source-configuration) is incorrect. The error message provides details on the issue, including the path to the field and the expected type.
 
 For example, if you have a source configuration like this:
 
@@ -1015,7 +1014,7 @@ If incorrect data is received from an endpoint, check the `data_selector` field
 
 #### Getting insufficient data or incorrect pagination
 
-Check the `paginator` field in the configuration. When not explicitly specified, the source tries to auto-detect the pagination method. If auto-detection fails, or the system is unsure, a warning is logged. For production environments, we recommend to specify an explicit paginator in the configuration. See the [pagination](#pagination) section for more details. Some APIs may have non-standard pagination methods, and you may need to implement a [custom paginator](../../../general-usage/http/rest-client.md#implementing-a-custom-paginator).
+Check the `paginator` field in the configuration. When not explicitly specified, the source tries to auto-detect the pagination method. If auto-detection fails, or the system is unsure, a warning is logged. For production environments, we recommend specifying an explicit paginator in the configuration. See the [pagination](#pagination) section for more details. Some APIs may have non-standard pagination methods, and you may need to implement a [custom paginator](../../../general-usage/http/rest-client.md#implementing-a-custom-paginator).
 
 #### Incremental loading not working
 
@@ -1023,11 +1022,11 @@ See the [troubleshooting guide](../../../general-usage/incremental-loading.md#tr
 
 #### Getting HTTP 404 errors
 
-Some API may return 404 errors for resources that do not exist or have no data. Manage these responses by configuring the `ignore` action in [response actions](./advanced#response-actions).
+Some APIs may return 404 errors for resources that do not exist or have no data. Manage these responses by configuring the `ignore` action in [response actions](./advanced#response-actions).
 
 ### Authentication issues
 
-If experiencing 401 (Unauthorized) errors, this could indicate:
+If you are experiencing 401 (Unauthorized) errors, this could indicate:
 
 - Incorrect authorization credentials. Verify credentials in the `secrets.toml`. Refer to [Secret and configs](../../../general-usage/credentials/setup#understanding-the-exceptions) for more information.
 - An incorrect authentication type. Consult the API documentation for the proper method. See the [authentication](#authentication) section for details. For some APIs, a [custom authentication method](../../../general-usage/http/rest-client.md#custom-authentication) may be required.
@@ -1037,3 +1036,4 @@ If experiencing 401 (Unauthorized) errors, this could indicate:
 The `rest_api` source uses the [RESTClient](../../../general-usage/http/rest-client.md) class for HTTP requests. Refer to the RESTClient [troubleshooting guide](../../../general-usage/http/rest-client.md#troubleshooting) for debugging tips.
 
 For further assistance, join our [Slack community](https://dlthub.com/community). We're here to help!
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/index.md b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/index.md
index dd9a77e297..f92d38f87e 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/index.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/index.md
@@ -11,8 +11,9 @@ You can use the REST API source to extract data from any REST API. Using a [decl
 * how to handle [pagination](./basic.md#pagination),
 * [authentication](./basic.md#authentication).
 
-dlt will take care of the rest: unnesting the data, inferring the schema etc, and writing to the destination.
+dlt will take care of the rest: unnesting the data, inferring the schema, etc., and writing to the destination.
 
 import DocCardList from '@theme/DocCardList';
 
-<DocCardList />
\ No newline at end of file
+<DocCardList />
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md
index 708b195456..74012b4311 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md
@@ -6,30 +6,28 @@ keywords: [sql connector, sql database pipeline, sql database]
 
 import Header from '../_source-info-header.md';
 
-# Advanced Usage
+# Advanced usage
 
 <Header/>
 
-## Incremental Loading
+## Incremental loading
 
 Efficient data management often requires loading only new or updated data from your SQL databases, rather than reprocessing the entire dataset. This is where incremental loading comes into play.
 
 Incremental loading uses a cursor column (e.g., timestamp or auto-incrementing ID) to load only data newer than a specified initial value, enhancing efficiency by reducing processing time and resource use. Read [here](../../../walkthroughs/sql-incremental-configuration) for more details on incremental loading with `dlt`.
 
-
 #### How to configure
-1. **Choose a Cursor Column**: Identify a column in your SQL table that can serve as a reliable indicator of new or updated rows. Common choices include timestamp columns or auto-incrementing IDs.
-1. **Set an Initial Value**: Choose a starting value for the cursor to begin loading data. This could be a specific timestamp or ID from which you wish to start loading data.
+1. **Choose a cursor column**: Identify a column in your SQL table that can serve as a reliable indicator of new or updated rows. Common choices include timestamp columns or auto-incrementing IDs.
+1. **Set an initial value**: Choose a starting value for the cursor to begin loading data. This could be a specific timestamp or ID from which you wish to start loading data.
 1. **Deduplication**: When using incremental loading, the system automatically handles the deduplication of rows based on the primary key (if available) or row hash for tables without a primary key.
-1. **Set end_value for backfill**: Set `end_value` if you want to backfill data from
-certain range.
-1. **Order returned rows**. Set `row_order` to `asc` or `desc` to order returned rows.
+1. **Set end_value for backfill**: Set `end_value` if you want to backfill data from a certain range.
+1. **Order returned rows**: Set `row_order` to `asc` or `desc` to order returned rows.
 
 #### Examples
 
 1. **Incremental loading with the resource `sql_table`**.
 
-  Consider a table "family" with a timestamp column `last_modified` that indicates when a row was last modified. To ensure that only rows modified after midnight (00:00:00) on January 1, 2024, are loaded, you would set `last_modified` timestamp as the cursor as follows:
+  Consider a table "family" with a timestamp column `last_modified` that indicates when a row was last modified. To ensure that only rows modified after midnight (00:00:00) on January 1, 2024, are loaded, you would set the `last_modified` timestamp as the cursor as follows:
 
   ```py
   import dlt
@@ -62,10 +60,10 @@ certain range.
   from dlt.sources.sql_database import sql_database
 
   source = sql_database().with_resources("family")
-  #using the "last_modified" field as an incremental field using initial value of midnight January 1, 2024
+  # Using the "last_modified" field as an incremental field using initial value of midnight January 1, 2024
   source.family.apply_hints(incremental=dlt.sources.incremental("updated", initial_value=pendulum.DateTime(2022, 1, 1, 0, 0, 0)))
 
-  #running the pipeline
+  # Running the pipeline
   pipeline = dlt.pipeline(destination="duckdb")
   info = pipeline.run(source, write_disposition="merge")
   print(info)
@@ -87,31 +85,31 @@ table = sql_table().parallelize()
 ```
 
 ## Column reflection
-Column reflection is the automatic detection and retrieval of column metadata like column names, constraints, data types etc. Columns and their data types are reflected with SQLAlchemy. The SQL types are then mapped to `dlt` types.
+Column reflection is the automatic detection and retrieval of column metadata like column names, constraints, data types, etc. Columns and their data types are reflected with SQLAlchemy. The SQL types are then mapped to `dlt` types.
 Depending on the selected backend, some of the types might require additional processing.
 
 The `reflection_level` argument controls how much information is reflected:
 
 - `reflection_level = "minimal"`: Only column names and nullability are detected. Data types are inferred from the data.
-- `reflection_level = "full"`: Column names, nullability, and data types are detected. For decimal types we always add precision and scale. **This is the default.**
+- `reflection_level = "full"`: Column names, nullability, and data types are detected. For decimal types, we always add precision and scale. **This is the default.**
 - `reflection_level = "full_with_precision"`: Column names, nullability, data types, and precision/scale are detected, also for types like text and binary. Integer sizes are set to bigint and to int for all other types.
 
-If the SQL type is unknown or not supported by `dlt`, then, in the pyarrow backend, the column will be skipped, whereas in the other backends the type will be inferred directly from the data irrespective of the `reflection_level` specified. In the latter case, this often means that some types are coerced to strings and  `dataclass` based values from sqlalchemy are inferred as `json` (JSON in most destinations).
+If the SQL type is unknown or not supported by `dlt`, then, in the pyarrow backend, the column will be skipped, whereas in the other backends the type will be inferred directly from the data irrespective of the `reflection_level` specified. In the latter case, this often means that some types are coerced to strings and `dataclass` based values from sqlalchemy are inferred as `json` (JSON in most destinations).
 :::tip
-If you use reflection level **full** / **full_with_precision** you may encounter a situation where the data returned by sqlalchemy or pyarrow backend does not match the reflected data types. Most common symptoms are:
-1. The destination complains that it cannot cast one type to another for a certain column. For example `connector-x` returns TIME in nanoseconds
+If you use reflection level **full** / **full_with_precision**, you may encounter a situation where the data returned by sqlalchemy or pyarrow backend does not match the reflected data types. The most common symptoms are:
+1. The destination complains that it cannot cast one type to another for a certain column. For example, `connector-x` returns TIME in nanoseconds
 and BigQuery sees it as bigint and fails to load.
-2. You get `SchemaCorruptedException` or other coercion error during the `normalize` step.
-In that case you may try **minimal** reflection level where all data types are inferred from the returned data. From our experience this prevents
+2. You get `SchemaCorruptedException` or another coercion error during the `normalize` step.
+In that case, you may try **minimal** reflection level where all data types are inferred from the returned data. From our experience, this prevents
 most of the coercion problems.
 :::
 
-You can also override the sql type by passing a `type_adapter_callback` function. This function takes a `SQLAlchemy` data type as input and returns a new type (or `None` to force the column to be inferred from the data) as output.
+You can also override the SQL type by passing a `type_adapter_callback` function. This function takes a `SQLAlchemy` data type as input and returns a new type (or `None` to force the column to be inferred from the data) as output.
 
 This is useful, for example, when:
-- You're loading a data type which is not supported by the destination (e.g. you need JSON type columns to be coerced to string)
-- You're using a sqlalchemy dialect which uses custom types that don't inherit from standard sqlalchemy types.
-- For certain types you prefer `dlt` to infer data type from the data and you return `None`
+- You're loading a data type that is not supported by the destination (e.g., you need JSON type columns to be coerced to string).
+- You're using a sqlalchemy dialect that uses custom types that don't inherit from standard sqlalchemy types.
+- For certain types, you prefer `dlt` to infer the data type from the data and you return `None`.
 
 In the following example, when loading timestamps from Snowflake, you ensure that they get translated into standard sqlalchemy `timestamp` columns in the resultant schema:
 
@@ -136,10 +134,11 @@ source = sql_database(
 dlt.pipeline("demo").run(source)
 ```
 
-## Configuring with toml/environment variables
+## Configuring with TOML/environment variables
+
 You can set most of the arguments of `sql_database()` and `sql_table()` directly in the `.toml` files and/or as environment variables. `dlt` automatically injects these values into the pipeline script.
 
-This is particularly useful with `sql_table()` because you can maintain a separate configuration for each table (below we show **secrets.toml** and **config.toml**, you are free to combine them into one):
+This is particularly useful with `sql_table()` because you can maintain a separate configuration for each table (below we show **secrets.toml** and **config.toml**; you are free to combine them into one):
 
 The examples below show how you can set arguments in any of the `.toml` files (`secrets.toml` or `config.toml`):
 1. Specifying connection string:
@@ -147,7 +146,7 @@ The examples below show how you can set arguments in any of the `.toml` files (`
     [sources.sql_database]
     credentials="mssql+pyodbc://loader.database.windows.net/dlt_data?trusted_connection=yes&driver=ODBC+Driver+17+for+SQL+Server"
     ```
-2. Setting parameters like backend, chunk_size, and incremental column for the table `chat_message`:
+2. Setting parameters like backend, `chunk_size`, and incremental column for the table `chat_message`:
     ```toml
     [sources.sql_database.chat_message]
     backend="pandas"
@@ -156,7 +155,7 @@ The examples below show how you can set arguments in any of the `.toml` files (`
     [sources.sql_database.chat_message.incremental]
     cursor_path="updated_at"
     ```
-    This is especially useful with `sql_table()` in a situation where you may want to run this resource for multiple tables. Setting parameters like this would then give you a clean way of maintaing separate configurations for each table.
+    This is especially useful with `sql_table()` in a situation where you may want to run this resource for multiple tables. Setting parameters like this would then give you a clean way of maintaining separate configurations for each table.
 
 3. Handling separate configurations for database and individual tables
     When using the `sql_database()` source, you can separately configure the parameters for the database and for the individual tables.
@@ -171,13 +170,13 @@ The examples below show how you can set arguments in any of the `.toml` files (`
     cursor_path="updated_at"
     ```
 
-    The resulting source created below will extract data using **pandas** backend with **chunk_size** 1000. The table **chat_message** will load data incrementally using **updated_at** column. All the other tables will not use incremental loading, and will instead load the full data.
+    The resulting source created below will extract data using the **pandas** backend with **chunk_size** 1000. The table **chat_message** will load data incrementally using the **updated_at** column. All the other tables will not use incremental loading and will instead load the full data.
 
     ```py
     database = sql_database()
     ```
 
-You'll be able to configure all the arguments this way (except adapter callback function). [Standard dlt rules apply]((/general-usage/credentials/setup).
+You'll be able to configure all the arguments this way (except the adapter callback function). [Standard dlt rules apply](../../../general-usage/credentials/setup).
 
 It is also possible to set these arguments as environment variables [using the proper naming convention](../../../general-usage/credentials/setup#naming-convention):
 ```sh
@@ -186,3 +185,4 @@ SOURCES__SQL_DATABASE__BACKEND=pandas
 SOURCES__SQL_DATABASE__CHUNK_SIZE=1000
 SOURCES__SQL_DATABASE__CHAT_MESSAGE__INCREMENTAL__CURSOR_PATH=updated_at
 ```
+
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/configuration.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/configuration.md
index 6de2a02b31..4236d656eb 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/configuration.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/configuration.md
@@ -10,11 +10,11 @@ import Header from '../_source-info-header.md';
 
 <Header/>
 
-## Configuring the SQL Database source
+## Configuring the SQL database source
 
-`dlt` sources are python scripts made up of source and resource functions that can be easily customized. The SQL Database verified source has the following built-in source and resource:
-1. `sql_database`: a `dlt` source which can be used to load multiple tables and views from a SQL database
-2. `sql_table`: a `dlt` resource that loads a single table from the SQL database
+`dlt` sources are Python scripts made up of source and resource functions that can be easily customized. The SQL Database verified source has the following built-in source and resource:
+1. `sql_database`: a `dlt` source that can be used to load multiple tables and views from a SQL database.
+2. `sql_table`: a `dlt` resource that loads a single table from the SQL database.
 
 Read more about sources and resources here: [General usage: source](../../../general-usage/source.md) and [General usage: resource](../../../general-usage/resource.md).
 
@@ -106,13 +106,13 @@ We intend our sources to be fully hackable. Feel free to change the source code
 
 ### Connection string format
 `sql_database` uses SQLAlchemy to create database connections and reflect table schemas. You can pass credentials using
-[database urls](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls), which has the general format:
+[database URLs](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls), which have the general format:
 
 ```py
 "dialect+database_type://username:password@server:port/database_name"
 ```
 
-For example, to connect to a MySQL database using the `pymysql` dialect you can use the following connection string:
+For example, to connect to a MySQL database using the `pymysql` dialect, you can use the following connection string:
 ```py
 "mysql+pymysql://rfamro:PWD@mysql-rfam-public.ebi.ac.uk:4497/Rfam"
 ```
@@ -123,17 +123,16 @@ Database-specific drivers can be passed into the connection string using query p
 "mssql+pyodbc://username:password@server/database?driver=ODBC+Driver+17+for+SQL+Server"
 ```
 
-
 ### Passing connection credentials to the `dlt` pipeline
 
 There are several options for adding your connection credentials into your `dlt` pipeline:
 
-#### 1. Setting them in `secrets.toml` or as environment variables (Recommended)
-
-You can set up credentials using [any method](../../../general-usage/credentials/setup#available-config-providers) supported by `dlt`. We recommend using `.dlt/secrets.toml` or the environment variables. See Step 2 of the [setup](./setup) for how to set credentials inside `secrets.toml`. For more information on passing credentials read [here](../../../general-usage/credentials/setup).
+#### 1. Setting them in `secrets.toml` or as environment variables (recommended)
 
+You can set up credentials using [any method](../../../general-usage/credentials/setup#available-config-providers) supported by `dlt`. We recommend using `.dlt/secrets.toml` or the environment variables. See Step 2 of the [setup](./setup) for how to set credentials inside `secrets.toml`. For more information on passing credentials, read [here](../../../general-usage/credentials/setup).
 
 #### 2. Passing them directly in the script
+
 It is also possible to explicitly pass credentials inside the source. Example:
 
 ```py
@@ -152,8 +151,11 @@ It is recommended to configure credentials in `.dlt/secrets.toml` and to not inc
 :::
 
 ### Other connection options
+
 #### Using SqlAlchemy Engine as credentials
+
 You are able to pass an instance of SqlAlchemy Engine instead of credentials:
+
 ```py
 from dlt.sources.sql_database import sql_table
 from sqlalchemy import create_engine
@@ -161,24 +163,20 @@ from sqlalchemy import create_engine
 engine = create_engine("mysql+pymysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam")
 table = sql_table(engine, table="chat_message", schema="data")
 ```
-This engine is used by `dlt` to open database connections and can work across multiple threads so is compatible with `parallelize` setting of dlt sources and resources.
 
+This engine is used by `dlt` to open database connections and can work across multiple threads, so it is compatible with the `parallelize` setting of dlt sources and resources.
 
 ## Configuring the backend
 
-Table backends convert streams of rows from database tables into batches in various formats. The default backend `SQLAlchemy` follows standard `dlt` behavior of
-extracting and normalizing Python dictionaries. We recommend this for smaller tables, initial development work, and when minimal dependencies or a pure Python environment is required. This backend is also the slowest. Other backends make use of the structured data format of the tables and provide significant improvement in speeds. For example, the `PyArrow` backend converts rows into `Arrow` tables, which results in
-good performance and preserves exact data types. We recommend using this backend for larger tables.
+Table backends convert streams of rows from database tables into batches in various formats. The default backend, `SQLAlchemy`, follows standard `dlt` behavior of extracting and normalizing Python dictionaries. We recommend this for smaller tables, initial development work, and when minimal dependencies or a pure Python environment is required. This backend is also the slowest. Other backends make use of the structured data format of the tables and provide significant improvement in speeds. For example, the `PyArrow` backend converts rows into `Arrow` tables, which results in good performance and preserves exact data types. We recommend using this backend for larger tables.
 
 ### SQLAlchemy
 
-The `SQLAlchemy` backend (the default) yields table data as a list of Python dictionaries. This data goes through the regular extract
-and normalize steps and does not require additional dependencies to be installed. It is the most robust (works with any destination, correctly represents data types) but also the slowest. You can set `reflection_level="full_with_precision"` to pass exact data types to `dlt` schema.
+The `SQLAlchemy` backend (the default) yields table data as a list of Python dictionaries. This data goes through the regular extract and normalize steps and does not require additional dependencies to be installed. It is the most robust (works with any destination, correctly represents data types) but also the slowest. You can set `reflection_level="full_with precision"` to pass exact data types to the `dlt` schema.
 
 ### PyArrow
 
-The `PyArrow` backend yields data as `Arrow` tables. It uses `SQLAlchemy` to read rows in batches but then immediately converts them into `ndarray`, transposes it, and sets it as columns in an `Arrow` table. This backend always fully
-reflects the database table and preserves original types (i.e. **decimal** / **numeric** data will be extracted without loss of precision). If the destination loads parquet files, this backend will skip `dlt` normalizer and you can gain two orders of magnitude (20x - 30x) speed increase.
+The `PyArrow` backend yields data as `Arrow` tables. It uses `SQLAlchemy` to read rows in batches but then immediately converts them into `ndarray`, transposes it, and sets it as columns in an `Arrow` table. This backend always fully reflects the database table and preserves original types (i.e., **decimal** / **numeric** data will be extracted without loss of precision). If the destination loads parquet files, this backend will skip the `dlt` normalizer, and you can gain two orders of magnitude (20x - 30x) speed increase.
 
 Note that if `pandas` is installed, we'll use it to convert `SQLAlchemy` tuples into `ndarray` as it seems to be 20-30% faster than using `numpy` directly.
 
@@ -207,21 +205,20 @@ info = pipeline.run(sql_alchemy_source)
 print(info)
 ```
 
-### pandas
+### Pandas
 
 The `pandas` backend yields data as DataFrames using the `pandas.io.sql` module. `dlt` uses `PyArrow` dtypes by default as they generate more stable typing.
 
 With the default settings, several data types will be coerced to dtypes in the yielded data frame:
-* **decimal** is mapped to double so it is possible to lose precision
+* **decimal** is mapped to double, so it is possible to lose precision
 * **date** and **time** are mapped to strings
 * all types are nullable
 
 :::note
-`dlt` will still use the data types reflected from the source database when creating destination tables. How the type differences resulting from the `pandas` backend are reconciled / parsed is up to the destination. Most of the destinations will be able to parse date/time strings and convert doubles into decimals (Please note that you'll still lose precision on decimals with default settings.). **However we strongly suggest
-not to use the** `pandas` **backend if your source tables contain date, time, or decimal columns**
+`dlt` will still use the data types reflected from the source database when creating destination tables. How the type differences resulting from the `pandas` backend are reconciled/parsed is up to the destination. Most of the destinations will be able to parse date/time strings and convert doubles into decimals (Please note that you'll still lose precision on decimals with default settings.). **However, we strongly suggest not to use the** `pandas` **backend if your source tables contain date, time, or decimal columns.**
 :::
 
-Internally dlt uses `pandas.io.sql._wrap_result` to generate `pandas` frames. To adjust [pandas-specific settings,](https://pandas.pydata.org/docs/reference/api/pandas.read_sql_table.html) pass it in the `backend_kwargs` parameter. For example, below we set `coerce_float` to `False`:
+Internally, `dlt` uses `pandas.io.sql._wrap_result` to generate `pandas` frames. To adjust [pandas-specific settings,](https://pandas.pydata.org/docs/reference/api/pandas.read_sql_table.html) pass it in the `backend_kwargs` parameter. For example, below we set `coerce_float` to `False`:
 
 ```py
 import dlt
@@ -252,22 +249,22 @@ print(info)
 ```
 
 ### ConnectorX
-The [`ConnectorX`](https://sfu-db.github.io/connector-x/intro.html) backend completely skips `SQLALchemy` when reading table rows, in favor of doing that in rust. This is claimed to be significantly faster than any other method (validated only on postgres). With the default settings it will emit `PyArrow` tables, but you can configure this by specifying the `return_type` in `backend_kwargs`. (See the [`ConnectorX` docs](https://sfu-db.github.io/connector-x/api.html) for a full list of configurable parameters.)
+The [`ConnectorX`](https://sfu-db.github.io/connector-x/intro.html) backend completely skips `SQLALchemy` when reading table rows, in favor of doing that in Rust. This is claimed to be significantly faster than any other method (validated only on PostgreSQL). With the default settings, it will emit `PyArrow` tables, but you can configure this by specifying the `return_type` in `backend_kwargs`. (See the [`ConnectorX` docs](https://sfu-db.github.io/connector-x/api.html) for a full list of configurable parameters.)
 
 There are certain limitations when using this backend:
-* it will ignore `chunk_size`. `ConnectorX` cannot yield data in batches.
-* in many cases it requires a connection string that differs from the `SQLAlchemy` connection string. Use the `conn` argument in `backend_kwargs` to set this.
-* it will convert **decimals** to **doubles**, so you will lose precision.
-* nullability of the columns is ignored (always true)
-* it uses different mappings for each data type. (Check [here](https://sfu-db.github.io/connector-x/databases.html) for more details.)
-* JSON fields (at least those coming from postgres) are double wrapped in strings. To unwrap this, you can pass the in-built transformation function `unwrap_json_connector_x` (for example, with `add_map`):
+* It will ignore `chunk_size`. `ConnectorX` cannot yield data in batches.
+* In many cases, it requires a connection string that differs from the `SQLAlchemy` connection string. Use the `conn` argument in `backend_kwargs` to set this.
+* It will convert **decimals** to **doubles**, so you will lose precision.
+* Nullability of the columns is ignored (always true).
+* It uses different mappings for each data type. (Check [here](https://sfu-db.github.io/connector-x/databases.html) for more details.)
+* JSON fields (at least those coming from PostgreSQL) are double-wrapped in strings. To unwrap this, you can pass the in-built transformation function `unwrap_json_connector_x` (for example, with `add_map`):
 
     ```py
     from dlt.sources.sql_database.helpers import unwrap_json_connector_x
     ```
 
 :::note
-`dlt` will still use the data types refected from the source database when creating destination tables. It is up to the destination to reconcile / parse type differences. Please note that you'll still lose precision on decimals with default settings.
+`dlt` will still use the data types reflected from the source database when creating destination tables. It is up to the destination to reconcile/parse type differences. Please note that you'll still lose precision on decimals with default settings.
 :::
 
 ```py
@@ -286,7 +283,7 @@ unsw_table = sql_table(
     backend="connectorx",
     # keep source data types
     reflection_level="full_with_precision",
-    # just to demonstrate how to setup a separate connection string for connectorx
+    # just to demonstrate how to set up a separate connection string for connectorx
     backend_kwargs={"conn": "postgresql://loader:loader@localhost:5432/dlt_data"}
 )
 
@@ -305,4 +302,5 @@ info = pipeline.run(
 )
 print(info)
 ```
-With the dataset above and a local postgres instance, the `ConnectorX` backend is 2x faster than the `PyArrow` backend.
+With the dataset above and a local PostgreSQL instance, the `ConnectorX` backend is 2x faster than the `PyArrow` backend.
+
diff --git a/docs/website/docs/dlt-ecosystem/visualizations/exploring-the-data.md b/docs/website/docs/dlt-ecosystem/visualizations/exploring-the-data.md
index d9aae62f94..65c937ef77 100644
--- a/docs/website/docs/dlt-ecosystem/visualizations/exploring-the-data.md
+++ b/docs/website/docs/dlt-ecosystem/visualizations/exploring-the-data.md
@@ -31,7 +31,7 @@ pipeline and hide many intricacies of correctly setting up the connection to you
 ### Querying the data using the `dlt` SQL client
 
 Execute any SQL query and get results following the Python
-[dbapi](https://peps.python.org/pep-0249/) spec. Below we fetch data from the customers table:
+[dbapi](https://peps.python.org/pep-0249/) spec. Below, we fetch data from the customers table:
 
 ```py
 pipeline = dlt.pipeline(destination="bigquery", dataset_name="crm")
@@ -40,17 +40,17 @@ with pipeline.sql_client() as client:
         "SELECT id, name, email FROM customers WHERE id = %s",
         10
     ) as cursor:
-        # get all data from the cursor as list of rows
+        # get all data from the cursor as a list of rows
         print(cursor.fetchall())
 ```
 
-In the above, we used `dbapi` parameters placeholders and fetched the data using `fetchall` method
+In the above, we used `dbapi` parameter placeholders and fetched the data using the `fetchall` method
 that reads all the rows from the cursor.
 
 ### Querying data into a data frame
 
-You can fetch results of any SQL query as a data frame. If the destination is supporting that
-natively (i.e. BigQuery and DuckDB), `dlt` uses the native method. Thanks to that, reading data
+You can fetch the results of any SQL query as a data frame. If the destination supports that
+natively (i.e., BigQuery and DuckDB), `dlt` uses the native method. Thanks to that, reading data
 frames may be really fast! The example below reads GitHub reactions data from the `issues` table and
 counts reaction types.
 
@@ -65,18 +65,18 @@ with pipeline.sql_client() as client:
     with client.execute_query(
         'SELECT "reactions__+1", "reactions__-1", reactions__laugh, reactions__hooray, reactions__rocket FROM issues'
     ) as table:
-        # calling `df` on a cursor, returns the data as a DataFrame
+        # calling `df` on a cursor returns the data as a DataFrame
         reactions = table.df()
 counts = reactions.sum(0).sort_values(0, ascending=False)
 ```
 
-The `df` method above returns all the data in the cursor as data frame. You can also fetch data in
-chunks by passing `chunk_size` argument to the `df` method.
+The `df` method above returns all the data in the cursor as a data frame. You can also fetch data in
+chunks by passing the `chunk_size` argument to the `df` method.
 
 ### Access destination native connection
 
 The native connection to your destination like BigQuery `Client` or DuckDB `DuckDBPyConnection` is
-available in case you want to do anything special. Below we take the native connection to `duckdb`
+available in case you want to do anything special. Below, we take the native connection to `duckdb`
 to get `DuckDBPyRelation` from a query:
 
 ```py
@@ -90,7 +90,7 @@ with pipeline.sql_client() as client:
     rel.limit(3).show()
 ```
 
-## Data Quality Dashboards
+## Data quality dashboards
 
 After deploying a `dlt` pipeline, you might ask yourself: How can we know if the data is and remains
 high quality?
@@ -108,38 +108,21 @@ any gaps or loading issues.
 
 ### Data usage as monitoring
 
-Setting up monitoring is a good idea. However, in practice, often by the time you notice something
-is wrong through reviewing charts, someone in the business has likely already noticed something is
-wrong. That is, if there is usage of the data, then that usage will act as sort of monitoring.
+Setting up monitoring is a good idea. However, in practice, often by the time you notice something is wrong through reviewing charts, someone in the business has likely already noticed something is wrong. That is, if there is usage of the data, then that usage will act as a sort of monitoring.
 
-### Plotting main metrics on the line charts
+### Plotting main metrics on line charts
 
-In cases where data is not being used that much (e.g. only one marketing analyst is using some data
-alone), then it is a good idea to have them plot their main metrics on "last 7 days" line charts, so
-it's visible to them that something may be off when they check their metrics.
+In cases where data is not being used much (e.g., only one marketing analyst is using some data alone), then it is a good idea to have them plot their main metrics on "last 7 days" line charts, so it's visible to them that something may be off when they check their metrics.
 
-It's important to think about granularity here. A daily line chart, for example, would not catch
-hourly issues well. Typically, you will want to match the granularity of the time dimension
-(day/hour/etc.) of the line chart with the things that could go wrong, either in the loading process
-or in the tracked process.
+It's important to think about granularity here. A daily line chart, for example, would not catch hourly issues well. Typically, you will want to match the granularity of the time dimension (day/hour/etc.) of the line chart with the things that could go wrong, either in the loading process or in the tracked process.
 
-If a dashboard is the main product of an analyst, they will generally watch it closely. Therefore,
-it's probably not necessary for a data engineer to include monitoring in their daily activities in
-these situations.
+If a dashboard is the main product of an analyst, they will generally watch it closely. Therefore, it's probably not necessary for a data engineer to include monitoring in their daily activities in these situations.
 
 ## Tools to create dashboards
 
-[Metabase](https://www.metabase.com/), [Looker Studio](https://lookerstudio.google.com/u/0/), and
-[Streamlit](https://streamlit.io/) are some common tools that you might use to set up dashboards to
-explore data. It's worth noting that while many tools are suitable for exploration, different tools
-enable your organization to achieve different things. Some organizations use multiple tools for
-different scopes:
-
-- Tools like [Metabase](https://www.metabase.com/) are intended for data democratization, where the
-  business user can change the dimension or granularity to answer follow-up questions.
-- Tools like [Looker Studio](https://lookerstudio.google.com/u/0/) and
-  [Tableau](https://www.tableau.com/) are intended for minimal interaction curated dashboards that
-  business users can filter and read as-is with limited training.
-- Tools like [Streamlit](https://streamlit.io/) enable powerful customizations and the building of
-  complex apps by Python-first developers, but they generally do not support self-service out of the
-  box.
+[Metabase](https://www.metabase.com/), [Looker Studio](https://lookerstudio.google.com/u/0/), and [Streamlit](https://streamlit.io/) are some common tools that you might use to set up dashboards to explore data. It's worth noting that while many tools are suitable for exploration, different tools enable your organization to achieve different things. Some organizations use multiple tools for different scopes:
+
+- Tools like [Metabase](https://www.metabase.com/) are intended for data democratization, where the business user can change the dimension or granularity to answer follow-up questions.
+- Tools like [Looker Studio](https://lookerstudio.google.com/u/0/) and [Tableau](https://www.tableau.com/) are intended for minimal interaction curated dashboards that business users can filter and read as-is with limited training.
+- Tools like [Streamlit](https://streamlit.io/) enable powerful customizations and the building of complex apps by Python-first developers, but they generally do not support self-service out of the box.
+
diff --git a/docs/website/docs/examples/index.md b/docs/website/docs/examples/index.md
index 5be3fd1632..b0b16e274d 100644
--- a/docs/website/docs/examples/index.md
+++ b/docs/website/docs/examples/index.md
@@ -1,14 +1,15 @@
 ---
 title: Code Examples
-description: A list of comprehensive code examples that teach you how to solve real world problem.
+description: A list of comprehensive code examples that teach you how to solve real world problems.
 keywords: ['examples']
 ---
 import DocCardList from '@theme/DocCardList';
 
-A list of comprehensive code examples that teach you how to solve a real world problem.
+A list of comprehensive code examples that teach you how to solve real-world problems.
 
 :::info
 If you want to share your example, follow this [contributing](https://github.com/dlt-hub/dlt/tree/devel/docs/examples/CONTRIBUTING.md) tutorial.
 :::
 
-<DocCardList />
\ No newline at end of file
+<DocCardList />
+
diff --git a/docs/website/docs/reference/command-line-interface.md b/docs/website/docs/reference/command-line-interface.md
index 14fadba74d..e29b43bcba 100644
--- a/docs/website/docs/reference/command-line-interface.md
+++ b/docs/website/docs/reference/command-line-interface.md
@@ -9,37 +9,37 @@ keywords: [command line interface, cli, dlt init]
 ```sh
 dlt init <source> <destination>
 ```
-This command creates new dlt pipeline script that loads data from `source` to `destination` to it. When you run the command:
-1. It creates basic project structure if the current folder is empty. Adds `.dlt/config.toml` and `.dlt/secrets.toml` and `.gitignore` files.
-2. It checks if `source` argument is matching one of our [verified sources](../dlt-ecosystem/verified-sources/) and if it is so, [it adds it to the project](../walkthroughs/add-a-verified-source.md).
-3. If the `source` is unknown it will use a [generic template](https://github.com/dlt-hub/python-dlt-init-template) to [get you started](../walkthroughs/create-a-pipeline.md).
+This command creates a new dlt pipeline script that loads data from `source` to `destination`. When you run the command:
+1. It creates a basic project structure if the current folder is empty, adding `.dlt/config.toml`, `.dlt/secrets.toml`, and `.gitignore` files.
+2. It checks if the `source` argument matches one of our [verified sources](../dlt-ecosystem/verified-sources/) and, if so, [adds it to the project](../walkthroughs/add-a-verified-source.md).
+3. If the `source` is unknown, it will use a [generic template](https://github.com/dlt-hub/python-dlt-init-template) to [get you started](../walkthroughs/create-a-pipeline.md).
 4. It will rewrite the pipeline scripts to use your `destination`.
 5. It will create sample config and credentials in `secrets.toml` and `config.toml` for the specified source and destination.
-6. It will create `requirements.txt` with dependencies required by source and destination. If one exists, it will print instructions what to add to it.
+6. It will create `requirements.txt` with dependencies required by the source and destination. If one exists, it will print instructions on what to add to it.
 
-This command can be used several times in the same folders to add more sources, destinations and pipelines. It will also update the verified source code to the newest
-version if run again with existing `source` name. You are warned if files will be overwritten or if `dlt` version needs upgrade to run particular pipeline.
+This command can be used several times in the same folder to add more sources, destinations, and pipelines. It will also update the verified source code to the newest
+version if run again with an existing `source` name. You are warned if files will be overwritten or if the `dlt` version needs an upgrade to run a particular pipeline.
 
-### Specify your own "verified sources" repository.
-You can use `--location <repo_url or local folder>` option to specify your own repository with sources. Typically you would [fork ours](https://github.com/dlt-hub/verified-sources) and start customizing and adding sources ie. to use them for your team or organization. You can also specify a branch with `--branch <name>` ie. to test a version being developed.
+### Specify your own "verified sources" repository
+You can use the `--location <repo_url or local folder>` option to specify your own repository with sources. Typically, you would [fork ours](https://github.com/dlt-hub/verified-sources) and start customizing and adding sources, e.g., to use them for your team or organization. You can also specify a branch with `--branch <name>`, e.g., to test a version being developed.
 
 ### List all sources
 ```sh
 dlt init --list-sources
 ```
-Shows all available verified sources and their short descriptions. For each source, checks if your local `dlt` version requires update
+Shows all available verified sources and their short descriptions. For each source, it checks if your local `dlt` version requires an update
 and prints the relevant warning.
 
 ## `dlt deploy`
-This command prepares your pipeline for deployment and gives you step by step instruction how to accomplish it. To enabled this functionality please first execute
+This command prepares your pipeline for deployment and gives you step-by-step instructions on how to accomplish it. To enable this functionality, please first execute
 ```sh
 pip install "dlt[cli]"
 ```
-that will add additional packages to current environment.
+that will add additional packages to the current environment.
 
 > 💡 We ask you to install those dependencies separately to keep our core library small and make it work everywhere.
 
-### github-action
+### `github-action`
 
 ```sh
 dlt deploy <script>.py github-action --schedule "*/30 * * * *"
@@ -56,9 +56,9 @@ schedule into quotation marks as in the example above.
 For the chess.com API example above, you could deploy it with
 `dlt deploy chess.py github-action --schedule "*/30 * * * *"`.
 
-Follow the guide on [how to deploy a pipeline with Github Actions](../walkthroughs/deploy-a-pipeline/deploy-with-github-actions) to learn more.
+Follow the guide on [how to deploy a pipeline with GitHub Actions](../walkthroughs/deploy-a-pipeline/deploy-with-github-actions) to learn more.
 
-### airflow-composer
+### `airflow-composer`
 
 ```sh
 dlt deploy <script>.py airflow-composer
@@ -71,17 +71,17 @@ Follow the guide on [how to deploy a pipeline with Airflow](../walkthroughs/depl
 It will create an Airflow DAG for your pipeline script that you should customize. The DAG is using
 `dlt` [Airflow wrapper](https://github.com/dlt-hub/dlt/blob/devel/dlt/helpers/airflow_helper.py#L37) to make this process trivial.
 
-It displays the environment variables with secrets you must add to the Airflow.
+It displays the environment variables with secrets you must add to Airflow.
 
-You'll also get a cloudbuild file to sync the github repository with the `dag` folder of your
+You'll also get a cloudbuild file to sync the GitHub repository with the `dag` folder of your
 Airflow Composer instance.
 
-> 💡 The command target Composer users but generated DAG and instructions will work with any Airflow
+> 💡 The command targets Composer users, but the generated DAG and instructions will work with any Airflow
 > instance.
 
 ## `dlt pipeline`
 
-Use this command to inspect the pipeline working directory, tables and data in the destination and
+Use this command to inspect the pipeline working directory, tables, and data in the destination and
 check for problems with the data loading.
 
 ### Show tables and data in the destination
@@ -91,8 +91,8 @@ dlt pipeline <pipeline name> show
 ```
 
 Generates and launches a simple [Streamlit](https://streamlit.io/) app that you can use to inspect
-the schemas and data in the destination as well as your pipeline state and loading status / stats.
-Should be executed from the same folder, from which you ran the pipeline script to access
+the schemas and data in the destination as well as your pipeline state and loading status/stats.
+Should be executed from the same folder from which you ran the pipeline script to access
 destination credentials. Requires `streamlit` to be installed.
 
 ### Get the pipeline information
@@ -101,9 +101,9 @@ destination credentials. Requires `streamlit` to be installed.
 dlt pipeline <pipeline name> info
 ```
 
-Displays content of the working directory of the pipeline: dataset name, destination, list of
+Displays the content of the working directory of the pipeline: dataset name, destination, list of
 schemas, resources in schemas, list of completed and normalized load packages, and optionally a
-pipeline state set by the resources during extraction process.
+pipeline state set by the resources during the extraction process.
 
 ### Get the load package information
 
@@ -111,11 +111,11 @@ pipeline state set by the resources during extraction process.
 dlt pipeline <pipeline name> load-package <load id>
 ```
 
-Shows information on a load package with given `load_id`. The `load_id` parameter defaults to the
+Shows information on a load package with a given `load_id`. The `load_id` parameter defaults to the
 most recent package. Package information includes its state (`COMPLETED/PROCESSED`) and list of all
-jobs in a package with their statuses, file sizes, types and in case of failed jobs—the error
-messages from the destination. With verbose flag set `dlt pipeline -v ...`, you can also see the
-list of all tables and columns created at the destination during loading of that package.
+jobs in a package with their statuses, file sizes, types, and in case of failed jobs—the error
+messages from the destination. With the verbose flag set `dlt pipeline -v ...`, you can also see the
+list of all tables and columns created at the destination during the loading of that package.
 
 ### List all failed jobs
 
@@ -123,7 +123,7 @@ list of all tables and columns created at the destination during loading of that
 dlt pipeline <pipeline name> failed-jobs
 ```
 
-This commands scans all the load packages looking for failed jobs and then displays information on
+This command scans all the load packages looking for failed jobs and then displays information on
 files that got loaded and the failure message from the destination.
 
 ### Get the last run trace
@@ -132,9 +132,9 @@ files that got loaded and the failure message from the destination.
 dlt pipeline <pipeline name> trace
 ```
 
-Displays the trace of last pipeline run containing the start data of the run, elapsed time and the
-same information for all the steps (`extract`, `normalize` and `load`). If any of the steps failed,
-you'll see message of the exceptions that caused that problem. Successful `load` and `run` steps
+Displays the trace of the last pipeline run containing the start date of the run, elapsed time, and the
+same information for all the steps (`extract`, `normalize`, and `load`). If any of the steps failed,
+you'll see the message of the exceptions that caused that problem. Successful `load` and `run` steps
 will display the [load info](walkthroughs/run-a-pipeline.md) instead.
 
 ### Sync pipeline with the destination
@@ -143,13 +143,13 @@ will display the [load info](walkthroughs/run-a-pipeline.md) instead.
 dlt pipeline <pipeline name> sync
 ```
 
-This command will remove pipeline working directory with all pending packages, not synchronized
-state changes and schemas and retrieve the last synchronized data from the destination. If you drop
-the dataset the pipeline is loading to, this command results in a complete reset of pipeline state.
+This command will remove the pipeline working directory with all pending packages, not synchronized
+state changes, and schemas and retrieve the last synchronized data from the destination. If you drop
+the dataset the pipeline is loading to, this command results in a complete reset of the pipeline state.
 
-In case of a pipeline without working directory, the command may be used to create one from the
-destination. In order to do that you need to pass the dataset name and destination name to the CLI
-and provide the credentials to connect to destination (ie. in `.dlt/secrets.toml`) placed in the
+In case of a pipeline without a working directory, the command may be used to create one from the
+destination. In order to do that, you need to pass the dataset name and destination name to the CLI
+and provide the credentials to connect to the destination (i.e., in `.dlt/secrets.toml`) placed in the
 folder where you execute the `pipeline sync` command.
 
 ### Selectively drop tables and reset state
@@ -159,14 +159,14 @@ dlt pipeline <pipeline name> drop [resource_1] [resource_2]
 ```
 
 Drops tables generated by selected resources and resets the state associated with them. Mainly used
-to force a full refresh on selected tables. In example below we drop all tables generated by
-`repo_events` resource in github pipeline:
+to force a full refresh on selected tables. In the example below, we drop all tables generated by
+the `repo_events` resource in the GitHub pipeline:
 
 ```sh
 dlt pipeline github_events drop repo_events
 ```
 
-`dlt` will inform you on the names of dropped tables and the resource state slots that will be
+`dlt` will inform you of the names of dropped tables and the resource state slots that will be
 reset:
 
 ```text
@@ -183,33 +183,33 @@ As a result of the command above:
 
 1. All the indicated tables will be dropped in the destination. Note that `dlt` drops the nested
    tables as well.
-1. All the indicated tables will be removed from the indicated schema.
-1. The state for the resource `repo_events` was found and will be reset.
-1. New schema and state will be stored in the destination.
+2. All the indicated tables will be removed from the indicated schema.
+3. The state for the resource `repo_events` was found and will be reset.
+4. New schema and state will be stored in the destination.
 
 The `drop` command accepts several advanced settings:
 
-1. You can use regexes to select resources. Prepend `re:` string to indicate regex pattern. Example
+1. You can use regexes to select resources. Prepend the `re:` string to indicate a regex pattern. The example
    below will select all resources starting with `repo`:
 
 ```sh
 dlt pipeline github_events drop "re:^repo"
 ```
 
-2. You can drop all tables in indicated schema:
+2. You can drop all tables in the indicated schema:
 
 ```sh
 dlt pipeline chess drop --drop-all
 ```
 
-3. You can indicate additional state slots to reset by passing JsonPath to source state. In example
-   below we reset the `archives` slot in source state:
+3. You can indicate additional state slots to reset by passing JsonPath to the source state. In the example
+   below, we reset the `archives` slot in the source state:
 
 ```sh
 dlt pipeline chess_pipeline drop --state-paths archives
 ```
 
-This will select the `archives` key in `chess` source:
+This will select the `archives` key in the `chess` source:
 
 ```json
 {
@@ -240,13 +240,14 @@ default pipelines folder.
 dlt pipeline <pipeline name> drop-pending-packages
 ```
 Removes all extracted and normalized packages in the pipeline's working dir.
-`dlt` keeps extracted and normalized load packages in pipeline working directory. When `run` method is called, it will attempt to normalize and load
-pending packages first. The command above removes such packages. Note that **pipeline state** is not reverted to the state at which the deleted package
-were created. Use `dlt pipeline ... sync` is recommended if your destination supports state sync.
+`dlt` keeps extracted and normalized load packages in the pipeline working directory. When the `run` method is called, it will attempt to normalize and load
+pending packages first. The command above removes such packages. Note that **pipeline state** is not reverted to the state at which the deleted packages
+were created. Using `dlt pipeline ... sync` is recommended if your destination supports state sync.
 
 
 ## Show stack traces
-If the command fails and you want to see the full stack trace add `--debug` just after `dlt` executable.
+If the command fails and you want to see the full stack trace, add `--debug` just after the `dlt` executable.
 ```sh
 dlt --debug pipeline github info
 ```
+
diff --git a/docs/website/docs/reference/frequently-asked-questions.md b/docs/website/docs/reference/frequently-asked-questions.md
index 6cb98d14eb..6cae330845 100644
--- a/docs/website/docs/reference/frequently-asked-questions.md
+++ b/docs/website/docs/reference/frequently-asked-questions.md
@@ -4,11 +4,9 @@ description: Questions asked frequently by users in technical help or github iss
 keywords: [faq, usage information, technical help]
 ---
 
-
 ## Can I configure different nesting levels for each resource?
 
-Yes, [this feature is available](../general-usage/resource.md#reduce-the-nesting-level-of-generated-tables). You can also control the nesting
-on a level of a particular column:
+Yes, [this feature is available](../general-usage/resource.md#reduce-the-nesting-level-of-generated-tables). You can also control the nesting on a level of a particular column:
 
 **Apply hints for nested columns**
 If certain columns should not be normalized, you can mark them as `json`. This can be done in two ways.
@@ -38,7 +36,7 @@ These methods allow for a degree of customization in handling data structure and
 
 ## Can I configure dlt to load data in chunks of 10,000 records for more efficient processing, and how does this affect data resumption and retries in case of failures?
 
-`dlt` buffers to disk, and has built-in resume and retry mechanisms. This makes it less beneficial to manually manage atomicity after the fact unless you're running serverless. If you choose to load every 10k records instead, you could potentially see benefits like quicker data arrival if you're actively reading, and easier resumption from the last loaded point in case of failure, assuming that state is well-managed and records are sorted.
+`dlt` buffers to disk and has built-in resume and retry mechanisms. This makes it less beneficial to manually manage atomicity after the fact unless you're running serverless. If you choose to load every 10k records instead, you could potentially see benefits like quicker data arrival if you're actively reading, and easier resumption from the last loaded point in case of failure, assuming that state is well-managed and records are sorted.
 
 It's worth noting that `dlt` includes a request library replacement with [built-in retries](../reference/performance#using-the-built-in-requests-client). This means if you pull 10 million records individually, your data should remain safe even in the face of network issues. To resume jobs after a failure, however, it's necessary to run the pipeline in its own virtual machine (VM). Ephemeral storage solutions like Cloud Run don't support job resumption.
 
@@ -76,8 +74,8 @@ This method ensures you obtain the full schema details, including all columns, a
 
 ## Is truncating or deleting a staging table safe?
 
-You can safely truncate those or even drop the whole staging dataset. However, it will have to be recreated on the next load and might incur extra loading time or cost.
-You can also delete it with Python using [Bigquery client.](https://cloud.google.com/bigquery/docs/samples/bigquery-delete-dataset#bigquery_delete_dataset-python)
+You can safely truncate or even drop the whole staging dataset. However, it will have to be recreated on the next load and might incur extra loading time or cost.
+You can also delete it with Python using the [Bigquery client](https://cloud.google.com/bigquery/docs/samples/bigquery-delete-dataset#bigquery_delete_dataset-python).
 
 ## How can I develop a "custom" pagination tracker?
 
diff --git a/docs/website/docs/reference/installation.md b/docs/website/docs/reference/installation.md
index e64e691c20..8947d197a8 100644
--- a/docs/website/docs/reference/installation.md
+++ b/docs/website/docs/reference/installation.md
@@ -15,7 +15,7 @@ python --version
 pip --version
 ```
 
-If you have a different python version installed or are missing pip, follow the instructions below to update your python version and/or install `pip`.
+If you have a different Python version installed or are missing pip, follow the instructions below to update your Python version and/or install `pip`.
 
 <Tabs values={[{"label": "Ubuntu", "value": "ubuntu"}, {"label": "macOS", "value": "macos"}, {"label": "Windows", "value": "windows"}]}  groupId="operating-systems" defaultValue="ubuntu">
   <TabItem value="ubuntu">
@@ -31,7 +31,7 @@ sudo apt install python3.10-venv
   </TabItem>
   <TabItem value="macos">
 
-On MacOS you can use [Homebrew](https://brew.sh) to install Python 3.10.
+On macOS, you can use [Homebrew](https://brew.sh) to install Python 3.10.
 
 ```sh
 brew update
@@ -50,10 +50,10 @@ C:\> pip3 install -U pip
   </TabItem>
 </Tabs>
 
-### 2. Set up and activate a virtual environment for your python project
+### 2. Set up and activate a virtual environment for your Python project
 
-We recommend working within a [virtual environment](https://docs.python.org/3/library/venv.html) when creating python projects.
-This way all the dependencies for your current project will be isolated from packages in other projects.
+We recommend working within a [virtual environment](https://docs.python.org/3/library/venv.html) when creating Python projects.
+This way, all the dependencies for your current project will be isolated from packages in other projects.
 
 <Tabs values={[{"label": "Ubuntu", "value": "ubuntu"}, {"label": "macOS", "value": "macos"}, {"label": "Windows", "value": "windows"}]}  groupId="operating-systems" defaultValue="ubuntu">
 
@@ -144,3 +144,4 @@ You are now ready to build your first pipeline with `dlt`. Check out these tutor
 - [Load data from a cloud storage or a file system](../tutorial/filesystem)
 
 Or read a more detailed tutorial on how to build a [custom data pipeline with dlt](../tutorial/load-data-from-an-api.md).
+
diff --git a/docs/website/docs/reference/performance.md b/docs/website/docs/reference/performance.md
index 58951702a5..6c542fec8c 100644
--- a/docs/website/docs/reference/performance.md
+++ b/docs/website/docs/reference/performance.md
@@ -8,8 +8,8 @@ keywords: [scaling, parallelism, finetuning]
 
 ## Yield pages instead of rows
 
-If you can, yield pages when producing data. This makes some processes more effective by lowering
-the necessary function calls (each chunk of data that you yield goes through the extract pipeline once so if you yield a chunk of 10.000 items you will gain significant savings)
+If possible, yield pages when producing data. This approach makes some processes more effective by reducing
+the number of necessary function calls (each chunk of data that you yield goes through the extract pipeline once, so if you yield a chunk of 10,000 items, you will gain significant savings).
 For example:
 <!--@@@DLT_SNIPPET ./performance_snippets/performance-snippets.py::performance_chunking-->
 
@@ -19,50 +19,50 @@ can be replaced with:
 
 
 ## Memory/disk management
-`dlt` buffers data in memory to speed up processing and uses file system to pass data between the **extract** and **normalize** stages. You can control the size of the buffers and size and number of the files to fine-tune memory and cpu usage. Those settings impact parallelism as well, which is explained in the next chapter.
+`dlt` buffers data in memory to speed up processing and uses the file system to pass data between the **extract** and **normalize** stages. You can control the size of the buffers and the size and number of the files to fine-tune memory and CPU usage. These settings also impact parallelism, which is explained in the next chapter.
 
 ### Controlling in-memory buffers
-`dlt` maintains in-memory buffers when writing intermediary files in the **extract** and **normalize** stages. The size of the buffers are controlled by specifying the number of data items held in them. Data is appended to open files when the item buffer is full, after which the buffer is cleared. You can the specify buffer size via environment variables or in `config.toml` to be more or less granular:
+`dlt` maintains in-memory buffers when writing intermediary files in the **extract** and **normalize** stages. The size of the buffers is controlled by specifying the number of data items held in them. Data is appended to open files when the item buffer is full, after which the buffer is cleared. You can specify the buffer size via environment variables or in `config.toml` to be more or less granular:
 * set all buffers (both extract and normalize)
 * set extract buffers separately from normalize buffers
-* set extract buffers for particular source or resource
+* set extract buffers for a particular source or resource
 
 <!--@@@DLT_SNIPPET ./performance_snippets/toml-snippets.toml::buffer_toml-->
 
 
 The default buffer is actually set to a moderately low value (**5000 items**), so unless you are trying to run `dlt`
-on IOT sensors or other tiny infrastructures, you might actually want to increase it to speed up
+on IoT sensors or other tiny infrastructures, you might actually want to increase it to speed up
 processing.
 
-### Controlling intermediary files size and rotation
-`dlt` writes data to intermediary files. You can control the file size and the number of created files by setting the maximum number of data items stored in a single file or the maximum single file size. Keep in mind that the file size is computed after compression was performed.
-* `dlt` uses a custom version of [`jsonl` file format](../dlt-ecosystem/file-formats/jsonl.md) between the **extract** and **normalize** stages.
-* files created between the **normalize** and **load** stages are the same files that will be loaded to destination.
+### Controlling intermediary file size and rotation
+`dlt` writes data to intermediary files. You can control the file size and the number of created files by setting the maximum number of data items stored in a single file or the maximum single file size. Keep in mind that the file size is computed after compression has been performed.
+* `dlt` uses a custom version of the [`jsonl` file format](../dlt-ecosystem/file-formats/jsonl.md) between the **extract** and **normalize** stages.
+* Files created between the **normalize** and **load** stages are the same files that will be loaded to the destination.
 
 :::tip
-The default setting is to not rotate the files so if you have a resource with millions of records, `dlt` will still create a single intermediary file to normalize and a single file to load. **If you want such data to be normalized and loaded in parallel you must enable file rotation as described below**
+The default setting is to not rotate the files, so if you have a resource with millions of records, `dlt` will still create a single intermediary file to normalize and a single file to load. **If you want such data to be normalized and loaded in parallel, you must enable file rotation as described below.**
 :::
 :::note
-Some file formats (ie. parquet) do not support schema changes when writing a single file and in that case they are automatically rotated when new columns are discovered.
+Some file formats (e.g., Parquet) do not support schema changes when writing a single file, and in that case, they are automatically rotated when new columns are discovered.
 :::
 
-Below we set files to rotated after 100.000 items written or when the filesize exceeds 1MiB.
+Below, we set files to rotate after 100,000 items written or when the filesize exceeds 1MiB.
 
 <!--@@@DLT_SNIPPET ./performance_snippets/toml-snippets.toml::file_size_toml-->
 
 
 
 ### Disabling and enabling file compression
-Several [text file formats](../dlt-ecosystem/file-formats/) have `gzip` compression enabled by default. If you wish that your load packages have uncompressed files (ie. to debug the content easily), change `data_writer.disable_compression` in config.toml. The entry below will disable the compression of the files processed in `normalize` stage.
+Several [text file formats](../dlt-ecosystem/file-formats/) have `gzip` compression enabled by default. If you wish that your load packages have uncompressed files (e.g., to debug the content easily), change `data_writer.disable_compression` in config.toml. The entry below will disable the compression of the files processed in the `normalize` stage.
 <!--@@@DLT_SNIPPET ./performance_snippets/toml-snippets.toml::compression_toml-->
 
 
 ### Freeing disk space after loading
 
-Keep in mind load packages are buffered to disk and are left for any troubleshooting, so you can [clear disk space by setting the `delete_completed_jobs` option](../running-in-production/running.md#data-left-behind).
+Keep in mind that load packages are buffered to disk and are left for any troubleshooting, so you can [clear disk space by setting the `delete_completed_jobs` option](../running-in-production/running.md#data-left-behind).
 
-### Observing cpu and memory usage
-Please make sure that you have the `psutil` package installed (note that Airflow installs it by default). Then you can dump the stats periodically by setting the [progress](../general-usage/pipeline.md#display-the-loading-progress) to `log` in `config.toml`:
+### Observing CPU and memory usage
+Please make sure that you have the `psutil` package installed (note that Airflow installs it by default). Then, you can dump the stats periodically by setting the [progress](../general-usage/pipeline.md#display-the-loading-progress) to `log` in `config.toml`:
 ```toml
 progress="log"
 ```
@@ -72,17 +72,17 @@ PROGRESS=log python pipeline_script.py
 ```
 
 ## Parallelism
-You can create pipelines that extract, normalize and load data in parallel.
+You can create pipelines that extract, normalize, and load data in parallel.
 
 ### Extract
-You can extract data concurrently if you write your pipelines to yield callables or awaitables or use async generators for your resources that can be then evaluated in a thread or futures pool respectively.
+You can extract data concurrently if you write your pipelines to yield callables or awaitables, or use async generators for your resources that can then be evaluated in a thread or futures pool respectively.
 
 This is easily accomplished by using the `parallelized` argument in the resource decorator.
-Resources based on sync generators will execute each step (yield) of the generator in a thread pool, so each individual resource is still extracted one item at a time but multiple such resources can run in parallel with each other.
+Resources based on sync generators will execute each step (yield) of the generator in a thread pool, so each individual resource is still extracted one item at a time, but multiple such resources can run in parallel with each other.
 
-Consider an example source which consists of 2 resources fetching pages of items from different API endpoints, and each of those resources are piped to transformers to fetch complete data items respectively.
+Consider an example source that consists of 2 resources fetching pages of items from different API endpoints, and each of those resources is piped to transformers to fetch complete data items respectively.
 
-The `parallelized=True` argument wraps the resources in a generator that yields callables to evaluate each generator step. These callables are executed in the thread pool. Transformer that are not generators (as shown in the example) are internally wrapped in a generator that yields once.
+The `parallelized=True` argument wraps the resources in a generator that yields callables to evaluate each generator step. These callables are executed in the thread pool. Transformers that are not generators (as shown in the example) are internally wrapped in a generator that yields once.
 
 <!--@@@DLT_SNIPPET ./performance_snippets/performance-snippets.py::parallel_extract_callables-->
 
@@ -90,20 +90,20 @@ The `parallelized=True` argument wraps the resources in a generator that yields
 The `parallelized` flag in the `resource` and `transformer` decorators is supported for:
 
 * Generator functions (as shown in the example)
-* Generators without functions (e.g. `dlt.resource(name='some_data', parallelized=True)(iter(range(100)))`)
+* Generators without functions (e.g., `dlt.resource(name='some_data', parallelized=True)(iter(range(100)))`)
 * `dlt.transformer` decorated functions. These can be either generator functions or regular functions that return one value
 
-You can control the number of workers in the thread pool with **workers** setting. The default number of workers is **5**. Below you see a few ways to do that with different granularity
+You can control the number of workers in the thread pool with the **workers** setting. The default number of workers is **5**. Below, you see a few ways to do that with different granularity.
 <!--@@@DLT_SNIPPET ./performance_snippets/toml-snippets.toml::extract_workers_toml-->
 
 
 
 The example below does the same but using an async generator as the main resource and async/await and futures pool for the transformer.
-The `parallelized` flag is not supported or needed for async generators, these are wrapped and evaluated concurrently by default:
+The `parallelized` flag is not supported or needed for async generators; these are wrapped and evaluated concurrently by default:
 <!--@@@DLT_SNIPPET ./performance_snippets/performance-snippets.py::parallel_extract_awaitables-->
 
 
-You can control the number of async functions/awaitables being evaluate in parallel by setting **max_parallel_items**. The default number is *20**. Below you see a few ways to do that with different granularity
+You can control the number of async functions/awaitables being evaluated in parallel by setting **max_parallel_items**. The default number is **20**. Below, you see a few ways to do that with different granularity.
 <!--@@@DLT_SNIPPET ./performance_snippets/toml-snippets.toml::extract_parallel_items_toml-->
 
 
@@ -114,11 +114,11 @@ of callables to be evaluated in a thread pool with a size of 5. This limit will
 
 :::caution
 Generators and iterators are always evaluated in a single thread: item by item. If you have a loop that yields items that you want to evaluate
-in parallel, instead yield functions or async functions that will be evaluates in separate threads or in async pool.
+in parallel, instead yield functions or async functions that will be evaluated in separate threads or in an async pool.
 :::
 
 ### Normalize
-The **normalize** stage uses a process pool to create load package concurrently. Each file created by the **extract** stage is sent to a process pool. **If you have just a single resource with a lot of data, you should enable [extract file rotation](#controlling-intermediary-files-size-and-rotation)**. The number of processes in the pool is controlled with `workers` config value:
+The **normalize** stage uses a process pool to create load packages concurrently. Each file created by the **extract** stage is sent to a process pool. **If you have just a single resource with a lot of data, you should enable [extract file rotation](#controlling-intermediary-files-size-and-rotation)**. The number of processes in the pool is controlled by the `workers` config value:
 <!--@@@DLT_SNIPPET ./performance_snippets/toml-snippets.toml::normalize_workers_toml-->
 
 
@@ -127,12 +127,12 @@ The default is to not parallelize normalization and to perform it in the main pr
 :::
 
 :::note
-Normalization is CPU bound and can easily saturate all your cores. Never allow `dlt` to use all cores on your local machine.
+Normalization is CPU-bound and can easily saturate all your cores. Never allow `dlt` to use all cores on your local machine.
 :::
 
 :::caution
 The default method of spawning a process pool on Linux is **fork**. If you are using threads in your code (or libraries that use threads),
-you should rather switch to **spawn**. Process forking does not respawn the threads and may destroy the critical sections in your code. Even logging
+you should switch to **spawn**. Process forking does not respawn the threads and may destroy the critical sections in your code. Even logging
 with Python loggers from multiple threads may lock the `normalize` step. Here's how you switch to **spawn**:
 ```toml
 [normalize]
@@ -142,20 +142,20 @@ start_method="spawn"
 :::
 
 ### Load
-The **load** stage uses a thread pool for parallelization. Loading is input/output bound. `dlt` avoids any processing of the content of the load package produced by the normalizer. By default loading happens in 20 threads, each loading a single file.
+The **load** stage uses a thread pool for parallelization. Loading is input/output-bound. `dlt` avoids any processing of the content of the load package produced by the normalizer. By default, loading happens in 20 threads, each loading a single file.
 
-As before, **if you have just a single table with millions of records you should enable [file rotation in the normalizer](#controlling-intermediary-files-size-and-rotation).**. Then  the number of parallel load jobs is controlled by the `workers` config setting.
+As before, **if you have just a single table with millions of records, you should enable [file rotation in the normalizer](#controlling-intermediary-files-size-and-rotation)**. Then the number of parallel load jobs is controlled by the `workers` config setting.
 
 <!--@@@DLT_SNIPPET ./performance_snippets/toml-snippets.toml::normalize_workers_2_toml-->
 
-Since the normalize stage uses a process pool to create load package concurrently, adjusting the `file_max_items` and `file_max_bytes` settings can significantly impact load behavior. By setting a lower value for `file_max_items`, you reduce the size of each data chunk sent to the destination database, which can be particularly useful for managing memory constraints on the database server. Without explicit configuration `file_max_items`, `dlt` writes all data rows into one large intermediary file, attempting to insert all data from this single file. Configuring `file_max_items` ensures data is inserted in manageable chunks, enhancing performance and preventing potential memory issues.
+Since the normalize stage uses a process pool to create load packages concurrently, adjusting the `file_max_items` and `file_max_bytes` settings can significantly impact load behavior. By setting a lower value for `file_max_items`, you reduce the size of each data chunk sent to the destination database, which can be particularly useful for managing memory constraints on the database server. Without explicit configuration of `file_max_items`, `dlt` writes all data rows into one large intermediary file, attempting to insert all data from this single file. Configuring `file_max_items` ensures data is inserted in manageable chunks, enhancing performance and preventing potential memory issues.
 
 ### Parallel pipeline config example
-The example below simulates loading of a large database table with 1 000 000 records. The **config.toml** below sets the parallelization as follows:
-* during extraction, files are rotated each 100 000 items, so there are 10 files with data for the same table
-* the normalizer will process the data in 3 processes
-* we use JSONL to load data to duckdb. We rotate JSONL files each 100 000 items so 10 files will be created.
-* we use 11 threads to load the data (10 JSON files + state file)
+The example below simulates the loading of a large database table with 1,000,000 records. The **config.toml** below sets the parallelization as follows:
+* During extraction, files are rotated each 100,000 items, so there are 10 files with data for the same table.
+* The normalizer will process the data in 3 processes.
+* We use JSONL to load data to duckdb. We rotate JSONL files each 100,000 items so 10 files will be created.
+* We use 11 threads to load the data (10 JSON files + state file).
 
 <!--@@@DLT_SNIPPET ./performance_snippets/.dlt/config.toml::parallel_config_toml-->
 
@@ -164,41 +164,43 @@ The example below simulates loading of a large database table with 1 000 000 rec
 <!--@@@DLT_SNIPPET ./performance_snippets/performance-snippets.py::parallel_config-->
 
 
+
+
 ### Source decomposition for serial and parallel resource execution
 
 You can decompose a pipeline into strongly connected components with
-`source().decompose(strategy="scc")`. The method returns a list of dlt sources each containing a
-single component. Method makes sure that no resource is executed twice.
+`source().decompose(strategy="scc")`. The method returns a list of dlt sources, each containing a
+single component. The method ensures that no resource is executed twice.
 
 **Serial decomposition:**
 
-You can load such sources as tasks serially in order present of the list. Such DAG is safe for
+You can load such sources as tasks serially in the order presented in the list. Such a DAG is safe for
 pipelines that use the state internally.
 [It is used internally by our Airflow mapper to construct DAGs.](https://github.com/dlt-hub/dlt/blob/devel/dlt/helpers/airflow_helper.py)
 
 **Parallel decomposition**
 
-If you are using only the resource state (which most of the pipelines really should!) you can run
+If you are using only the resource state (which most of the pipelines really should!), you can run
 your tasks in parallel.
 
 - Perform the `scc` decomposition.
-- Run each component in a pipeline with different but deterministic `pipeline_name` (same component
-  \- same pipeline, you can use names of selected resources in source to construct unique id).
+- Run each component in a pipeline with a different but deterministic `pipeline_name` (same component
+  \- same pipeline; you can use names of selected resources in the source to construct a unique id).
 
-Each pipeline will have its private state in the destination and there won't be any clashes. As all
-the components write to the same schema you may observe a that loader stage is attempting to migrate
-the schema, that should be a problem though as long as your data does not create variant columns.
+Each pipeline will have its private state in the destination, and there won't be any clashes. As all
+the components write to the same schema, you may observe that the loader stage is attempting to migrate
+the schema. That should not be a problem, though, as long as your data does not create variant columns.
 
 **Custom decomposition**
 
 - When decomposing pipelines into tasks, be mindful of shared state.
 - Dependent resources pass data to each other via generators - so they need to run on the same
-  worker. Group them in a task that runs them together - otherwise some resources will be extracted twice.
+  worker. Group them in a task that runs them together - otherwise, some resources will be extracted twice.
 - State is per-pipeline. The pipeline identifier is the pipeline name. A single pipeline state
   should be accessed serially to avoid losing details on parallel runs.
 
 
-## Running several pipelines in parallel in single process
+## Running several pipelines in parallel in a single process
 You can run several pipeline instances in parallel from a single process by placing them in
 separate threads. The most straightforward way is to use `ThreadPoolExecutor` and `asyncio` to execute pipeline methods.
 
@@ -209,25 +211,25 @@ separate threads. The most straightforward way is to use `ThreadPoolExecutor` an
 Please note the following:
 1. Do not run pipelines with the same name and working dir in parallel. State synchronization will not
 work in that case.
-2. When running in multiple threads and using [parallel normalize step](#normalize) , use **spawn**
+2. When running in multiple threads and using [parallel normalize step](#normalize), use the **spawn**
 process start method.
-3. If you created the `Pipeline` object in the worker thread and you use it from another (ie. main thread)
-call `pipeline.activate()` to inject the right context into current thread.
+3. If you created the `Pipeline` object in the worker thread and you use it from another (i.e., the main thread),
+call `pipeline.activate()` to inject the right context into the current thread.
 :::
 
 ## Resources extraction, `fifo` vs. `round robin`
 
-When extracting from resources, you have two options to determine what the order of queries to your
-resources are: `round_robin` and `fifo`.
+When extracting from resources, you have two options to determine the order of queries to your
+resources: `round_robin` and `fifo`.
 
-`round_robin` is the default option and will result in extraction of one item from the first resource, then one item from the second resource etc, doing as many rounds as necessary until all resources are fully extracted. If you want to extract resources in parallel, you will need to keep `round_robin`.
+`round_robin` is the default option and will result in the extraction of one item from the first resource, then one item from the second resource, etc., doing as many rounds as necessary until all resources are fully extracted. If you want to extract resources in parallel, you will need to keep `round_robin`.
 
 `fifo` is an option for sequential extraction. It will result in every resource being fully extracted until the resource generator is expired, or a configured limit is reached, then the next resource will be evaluated. Resources are extracted in the order that you added them to your source.
 
 :::tip
 Switch to `fifo` when debugging sources with many resources and connected transformers, for example [rest_api](../dlt-ecosystem/verified-sources/rest_api/index.md).
-Your data will be requested in deterministic and straightforward order - given data item (ie. user record you got from API) will be processed by all resources
-and transformers until completion before starting with new one
+Your data will be requested in a deterministic and straightforward order - a given data item (i.e., a user record you got from an API) will be processed by all resources
+and transformers until completion before starting with a new one.
 :::
 
 You can change this setting in your `config.toml` as follows:
@@ -235,28 +237,30 @@ You can change this setting in your `config.toml` as follows:
 <!--@@@DLT_SNIPPET ./performance_snippets/toml-snippets.toml::item_mode_toml-->
 
 
-## Use built in json parser
-`dlt` uses **orjson** if available. If not it falls back to  **simplejson**. The built in parsers serialize several Python types:
+
+## Use built-in JSON parser
+`dlt` uses **orjson** if available. If not, it falls back to **simplejson**. The built-in parsers serialize several Python types:
 - Decimal
 - DateTime, Date
 - Dataclasses
 
-Import the module as follows
+Import the module as follows:
 ```py
 from dlt.common import json
 ```
 
 :::tip
-**orjson** is fast and available on most platforms. It uses binary streams, not strings to load data natively.
-- open files as binary, not string to use `load` and `dump`
-- use `loadb` and `dumpb` methods to work with bytes without decoding strings
+**orjson** is fast and available on most platforms. It uses binary streams, not strings, to load data natively.
+- Open files as binary, not string, to use `load` and `dump`.
+- Use `loadb` and `dumpb` methods to work with bytes without decoding strings.
 
-You can switch to **simplejson** at any moment by (1) removing **orjson** dependency or (2) setting the following env variable:
+You can switch to **simplejson** at any moment by (1) removing the **orjson** dependency or (2) setting the following env variable:
 ```sh
 DLT_USE_JSON=simplejson
 ```
 :::
 
-## Using the built in requests wrapper or RESTClient for API calls
+## Using the built-in requests wrapper or RESTClient for API calls
 
 Instead of using Python Requests directly, you can use the built-in [requests wrapper](../general-usage/http/requests) or [`RESTClient`](../general-usage/http/rest-client) for API calls. This will make your pipeline more resilient to intermittent network errors and other random glitches.
+
diff --git a/docs/website/docs/reference/telemetry.md b/docs/website/docs/reference/telemetry.md
index 43d537e670..ff10f53af7 100644
--- a/docs/website/docs/reference/telemetry.md
+++ b/docs/website/docs/reference/telemetry.md
@@ -6,15 +6,11 @@ keywords: [telemetry, usage information, opt out]
 
 # Telemetry
 
-`dlt` collects and reports **anonymous** usage information. This information is essential to figure
-out how we should improve the library. Telemetry does not send any personal data. We create random
-tracking cookie that is stored in your `~./dlt` directory. You can disable telemetry at any moment
-or send it to your own servers instead.
+`dlt` collects and reports **anonymous** usage information. This information is essential to figuring out how we should improve the library. Telemetry does not send any personal data. We create a random tracking cookie that is stored in your `~/.dlt` directory. You can disable telemetry at any moment or send it to your own servers instead.
 
-## How to opt-out
+## How to opt out
 
-You can disable telemetry by adding `--disable-telemetry` to any dlt
-[command](command-line-interface.md).
+You can disable telemetry by adding `--disable-telemetry` to any dlt [command](command-line-interface.md).
 
 This command will disable telemetry both in the current project and globally for the whole machine:
 
@@ -34,8 +30,7 @@ You can check the current telemetry status with this command:
 dlt telemetry
 ```
 
-The other way to disable telemetry is to set the `runtime.dlthub_telemetry` option in `config.toml`
-file in `.dlt` folder.
+Another way to disable telemetry is to set the `runtime.dlthub_telemetry` option in the `config.toml` file in the `.dlt` folder.
 
 ```toml
 [runtime]
@@ -47,11 +42,8 @@ dlthub_telemetry=false
 
 Anonymous telemetry is sent when:
 
-- Any `dlt` command is executed from the command line. The data contains the command name. In the
-  case of `dlt init` command, we also send the requested destination and data source names.
-- When `pipeline.run` is called, we send information when
-  [extract, normalize and load](explainers/how-dlt-works.md) steps are completed. The data contains
-  the destination name (e.g. `duckdb`), hashes of: dataset name, pipeline name, default schema name, destination fingerprint (which is a hash of selected destination configuration fields), elapsed time, and if the step succeeded or not.
+- Any `dlt` command is executed from the command line. The data contains the command name. In the case of the `dlt init` command, we also send the requested destination and data source names.
+- When `pipeline.run` is called, we send information when the [extract, normalize, and load](explainers/how-dlt-works.md) steps are completed. The data contains the destination name (e.g., `duckdb`), hashes of the dataset name, pipeline name, default schema name, destination fingerprint (which is a hash of selected destination configuration fields), elapsed time, and whether the step succeeded or not.
 - When `dbt` and `airflow` helpers are used
 
 Here is an example `dlt init` telemetry message:
@@ -122,23 +114,19 @@ Example for `load` pipeline run step:
 
 ## The message `context`
 
-The message context contains the following information:
+The message `context` contains the following information:
 
 - `anonymousId`: a random tracking cookie stored in `~/.dlt/.anonymous_id`.
-- `ci_run`: a flag indicating if the message was sent from a CI environment (e.g. `Github Actions`,
-  `Travis CI`).
-- `cpu`: contains number of cores.
-- `exec_info`: contains a list of strings that identify execution environment: (e.g. `kubernetes`,
-  `docker`, `airflow`).
-- The `library`, `os`, and `python` give us some understanding of the runtime environment of the
-  `dlt`.
+- `ci_run`: a flag indicating if the message was sent from a CI environment (e.g., `GitHub Actions`, `Travis CI`).
+- `cpu`: contains the number of cores.
+- `exec_info`: contains a list of strings that identify the execution environment: (e.g., `kubernetes`, `docker`, `airflow`).
+- The `library`, `os`, and `python` give us some understanding of the runtime environment of the `dlt`.
 
 ## Send telemetry data to your own tracker
-You can setup your own tracker to receive telemetry events. You can create scalable, globally distributed
-edge service [using `dlt` and Cloudflare](https://dlthub.com/blog/dlt-segment-migration).
 
-Once your tracker is running, point `dlt` to it. You can use global `config.toml` to redirect all pipelines on
-a given machine.
+You can set up your own tracker to receive telemetry events. You can create a scalable, globally distributed edge service [using `dlt` and Cloudflare](https://dlthub.com/blog/dlt-segment-migration).
+
+Once your tracker is running, point `dlt` to it. You can use the global `config.toml` to redirect all pipelines on a given machine.
 
 ```toml
 [runtime]
@@ -147,12 +135,11 @@ dlthub_telemetry_endpoint="telemetry-tracker.services4745.workers.dev"
 
 ### Track events with Segment
 
-You can send the anonymous telemetry to your own [Segment](https://segment.com/)
-account. You need to create a HTTP Server source and generate a WRITE KEY, which you then pass to
-the `config.toml` like this:
+You can send anonymous telemetry to your own [Segment](https://segment.com/) account. You need to create an HTTP Server source and generate a WRITE KEY, which you then pass to the `config.toml` like this:
 
 ```toml
 [runtime]
 dlthub_telemetry_endpoint="https://api.segment.io/v1/track"
 dlthub_telemetry_segment_write_key="<write_key>"
 ```
+
diff --git a/docs/website/docs/reference/tracing.md b/docs/website/docs/reference/tracing.md
index 0ad0a59912..1c8cfda713 100644
--- a/docs/website/docs/reference/tracing.md
+++ b/docs/website/docs/reference/tracing.md
@@ -1,6 +1,6 @@
 1. Identifiers
 
-2. Data Lineage
+2. Data lineage
 
-3. Schema Lineage
+3. Schema lineage
 
diff --git a/docs/website/docs/running-in-production/alerting.md b/docs/website/docs/running-in-production/alerting.md
index c1fdb57ec2..9e14399d4f 100644
--- a/docs/website/docs/running-in-production/alerting.md
+++ b/docs/website/docs/running-in-production/alerting.md
@@ -13,11 +13,11 @@ of our data product.
 
 An alert is triggered by a specific action:
 
-- what cases do you want to alert?
-- where should the alert be sent?
-- how can you create a useful message?
+- What cases do you want to alert?
+- Where should the alert be sent?
+- How can you create a useful message?
 
-For example, an actionable alert contains info to help take a follow up action: what, when, and why
+For example, an actionable alert contains information to help take a follow-up action: what, when, and why
 the pipeline broke (with a link to the error log):
 
 ![Airflow Slack notification](images/airflow_slack_notification.png)
@@ -25,7 +25,7 @@ the pipeline broke (with a link to the error log):
 While we may create all kinds of tests and associated alerts, the first ones are usually alerts
 about the running status of your pipeline. Unfortunately, the outcome of a pipeline is not binary:
 it could succeed, it could fail, it could be late due to extra data, it could be stuck due to a bug,
-it could be not started due to a failed dependency, etc. Due to the complexity of the cases, usually
+it could be not started due to a failed dependency, etc. Due to the complexity of the cases, usually,
 you alert failures and [monitor](monitoring.md) (lack of) success.
 
 We could also use alerts as a way to deliver tests. For example, a customer support representative
@@ -56,7 +56,7 @@ for package in info.load_packages:
         # Iterate over each column in the current table
         for column_name, column in table["columns"].items():
             # Send a message to the Slack channel with the table
-						# and column update information
+            # and column update information
             send_slack_message(
                 hook,
                 message=(
diff --git a/docs/website/docs/running-in-production/monitoring.md b/docs/website/docs/running-in-production/monitoring.md
index d9571bc6dd..ec16a18524 100644
--- a/docs/website/docs/running-in-production/monitoring.md
+++ b/docs/website/docs/running-in-production/monitoring.md
@@ -6,8 +6,8 @@ keywords: [monitoring, run monitoring, data monitoring, airflow, github actions]
 
 # Monitoring
 
-Monitoring and [alerting](alerting.md) are used together to give a most complete picture of the
-health of a data product. With monitoring, we look at much more information than we consider when
+Monitoring and [alerting](alerting.md) are used together to provide a more complete picture of the
+health of a data product. With monitoring, we examine much more information than we consider when
 alerting. Monitoring is meant to give a fast, simple overview of the health of the system. How to
 best monitor a `dlt` pipeline will depend on your [deployment method](../walkthroughs/deploy-a-pipeline/).
 
@@ -15,10 +15,10 @@ best monitor a `dlt` pipeline will depend on your [deployment method](../walkthr
 
 ### Airflow
 
-In Airflow, at the top level we can monitor:
+In Airflow, at the top level, we can monitor:
 
 - The tasks scheduled to (not) run.
-- Run history (e.g. success / failure).
+- Run history (e.g., success/failure).
 
 Airflow DAGs:
 
@@ -30,10 +30,10 @@ Airflow DAG tasks:
 
 ### GitHub Actions
 
-In GitHub Actions, at the top level we can monitor:
+In GitHub Actions, at the top level, we can monitor:
 
 - The workflows scheduled to (not) run.
-- Run history (e.g. success / failure).
+- Run history (e.g., success/failure).
 
 GitHub Actions workflows:
 
@@ -50,7 +50,7 @@ receiving rich information on executed pipelines, including encountered errors a
 
 ## Data monitoring
 
-Data quality monitoring is considered with ensuring that quality data arrives to the data warehouse
+Data quality monitoring is concerned with ensuring that quality data arrives at the data warehouse
 on time. The reason we do monitoring instead of alerting for this is because we cannot easily define
 alerts for what could go wrong.
 
@@ -82,7 +82,7 @@ Normalized data for the following tables:
 - retailers: 1342 row(s)
 ```
 
-To load these info back to the destination you can use the following:
+To load this information back to the destination, you can use the following:
 ```py
 # Create a pipeline with the specified name, destination, and dataset
 # Run the pipeline
@@ -97,7 +97,7 @@ pipeline.run([trace], table_name="_trace")
 This process loads several additional tables to the destination, which provide insights into
 the extract, normalize, and load steps. Information on the number of rows loaded for each table,
 along with the `load_id`, can be found in the `_trace__steps__extract_info__table_metrics` table.
-The `load_id` is an epoch timestamp that indicates when the loading was completed. Here's graphical
+The `load_id` is an epoch timestamp that indicates when the loading was completed. Here's a graphical
 representation of the rows loaded with `load_id` for different tables:
 
 ![image](https://storage.googleapis.com/dlt-blog-images/docs_monitoring_count_of_rows_vs_load_id.jpg)
@@ -116,4 +116,5 @@ info = pipeline.run(source, table_name="table_name", write_disposition='append')
 
 print(info.load_packages[0])
 ```
-> `load_packages[0]` will print the information of the first load package in the list of load packages.
\ No newline at end of file
+> `load_packages[0]` will print the information of the first load package in the list of load packages.
+
diff --git a/docs/website/docs/running-in-production/running.md b/docs/website/docs/running-in-production/running.md
index 0c010332e7..e7b571548d 100644
--- a/docs/website/docs/running-in-production/running.md
+++ b/docs/website/docs/running-in-production/running.md
@@ -6,34 +6,29 @@ keywords: [running, production, tips]
 
 # Running
 
-When running the pipeline in production, you may consider a few additions to your script. We'll use
-the script below as a starting point.
+When running the pipeline in production, you may consider a few additions to your script. We'll use the script below as a starting point.
 
 ```py
 import dlt
 from chess import chess
 
-if __name__ == "__main__" :
+if __name__ == "__main__":
     pipeline = dlt.pipeline(pipeline_name="chess_pipeline", destination='duckdb', dataset_name="games_data")
     # get data for a few famous players
-    data = chess(['magnuscarlsen','vincentkeymer', 'dommarajugukesh', 'rpragchess'], start_month="2022/11", end_month="2022/12")
+    data = chess(['magnuscarlsen', 'vincentkeymer', 'dommarajugukesh', 'rpragchess'], start_month="2022/11", end_month="2022/12")
     load_info = pipeline.run(data)
 ```
 
 ## Inspect and save the load info and trace
 
-The `load_info` contains plenty of useful information on the recently loaded data. It contains the
-pipeline and dataset name, the destination information (without secrets) and list of loaded
-packages. Package information contains its state (`COMPLETED/PROCESSED`) and list of all jobs with
-their statuses, file sizes, types and in case of failed jobs-the error messages from the
-destination.
+The `load_info` contains plenty of useful information on the recently loaded data. It contains the pipeline and dataset name, the destination information (without secrets), and a list of loaded packages. Package information contains its state (`COMPLETED/PROCESSED`) and a list of all jobs with their statuses, file sizes, types, and in case of failed jobs, the error messages from the destination.
 
 ```py
     # see when load was started
     print(load_info.started_at)
     # print the information on the first load package and all jobs inside
     print(load_info.load_packages[0])
-    # print the information on the first completed job in first load package
+    # print the information on the first completed job in the first load package
     print(load_info.load_packages[0].jobs["completed_jobs"][0])
 ```
 
@@ -44,40 +39,33 @@ destination.
     pipeline.run([load_info], table_name="_load_info")
 ```
 
-You can also get the runtime trace from the pipeline. It contains timing information on `extract`,
-`normalize` and `load` steps and also all the config and secret values with full information from
-where they were obtained. You can display and load trace info as shown below. Use your code editor
-to explore `trace` object further. The `normalize` step information contains the counts of rows per
-table of data that was normalized and then loaded.
+You can also get the runtime trace from the pipeline. It contains timing information on `extract`, `normalize`, and `load` steps and also all the config and secret values with full information from where they were obtained. You can display and load trace info as shown below. Use your code editor to explore the `trace` object further. The `normalize` step information contains the counts of rows per table of data that was normalized and then loaded.
 
 ```py
-    # print human friendly trace information
+    # print human-friendly trace information
     print(pipeline.last_trace)
     # save trace to destination, sensitive data will be removed
     pipeline.run([pipeline.last_trace], table_name="_trace")
 ```
 
-You can also access the last `extract`, `normalize` and `load` infos directly:
+You can also access the last `extract`, `normalize`, and `load` infos directly:
 
 ```py
-    # print human friendly extract information
+    # print human-friendly extract information
     print(pipeline.last_trace.last_extract_info)
-    # print human friendly normalization information
+    # print human-friendly normalization information
     print(pipeline.last_trace.last_normalize_info)
     # access row counts dictionary of normalize info
     print(pipeline.last_trace.last_normalize_info.row_counts)
-    # print human friendly load information
+    # print human-friendly load information
     print(pipeline.last_trace.last_load_info)
 ```
 
-Please note that you can inspect the pipeline using
-[command line](../reference/command-line-interface.md#dlt-pipeline).
+Please note that you can inspect the pipeline using [command line](../reference/command-line-interface.md#dlt-pipeline).
 
-### Inspect, save and alert on schema changes
+### Inspect, save, and alert on schema changes
 
-In the package information you can also see the list of all tables and columns created at the
-destination during loading of that package. The code below displays all tables and schemas. Note that
-those objects are Typed Dictionaries, use your code editor to explore.
+In the package information, you can also see the list of all tables and columns created at the destination during the loading of that package. The code below displays all tables and schemas. Note that those objects are Typed Dictionaries; use your code editor to explore.
 
 ```py
     # print all the new tables/columns in
@@ -88,8 +76,7 @@ those objects are Typed Dictionaries, use your code editor to explore.
                 print(f"\tcolumn {column_name}: {column['data_type']}")
 ```
 
-You can save only the new tables and column schemas to the destination. Note that the code above
-that saves `load_info` saves this data as well.
+You can save only the new tables and column schemas to the destination. Note that the code above that saves `load_info` saves this data as well.
 
 ```py
     # save just the new tables
@@ -99,29 +86,23 @@ that saves `load_info` saves this data as well.
 
 ## Data left behind
 
-By default `dlt` leaves the loaded packages intact so they may be fully queried and inspected after
-load. This behavior may be changed so the successfully completed jobs are deleted from the loaded
-package. In that case, for a correctly behaving pipeline, only minimum amount of data will be left
-behind. In `config.toml`:
+By default, `dlt` leaves the loaded packages intact so they may be fully queried and inspected after loading. This behavior may be changed so that the successfully completed jobs are deleted from the loaded package. In that case, for a correctly behaving pipeline, only a minimum amount of data will be left behind. In `config.toml`:
 
 ```toml
 [load]
 delete_completed_jobs=true
 ```
 
-Also, by default, `dlt` leaves data in [staging dataset](../dlt-ecosystem/staging.md#staging-dataset), used during merge and replace load for deduplication. In order to clear it, put the following line in `config.toml`:
+Also, by default, `dlt` leaves data in the [staging dataset](../dlt-ecosystem/staging.md#staging-dataset), used during merge and replace load for deduplication. In order to clear it, put the following line in `config.toml`:
 
 ```toml
 [load]
 truncate_staging_dataset=true
 ```
 
-## Using slack to send messages
+## Using Slack to send messages
 
-`dlt` provides basic support for sending slack messages. You can configure Slack incoming hook via
-[secrets.toml or environment variables](../general-usage/credentials/setup). Please note that **Slack
-incoming hook is considered a secret and will be immediately blocked when pushed to github
-repository**. In `secrets.toml`:
+`dlt` provides basic support for sending Slack messages. You can configure the Slack incoming hook via [secrets.toml or environment variables](../general-usage/credentials/setup). Please note that **the Slack incoming hook is considered a secret and will be immediately blocked when pushed to a GitHub repository**. In `secrets.toml`:
 
 ```toml
 [runtime]
@@ -134,8 +115,7 @@ or
 RUNTIME__SLACK_INCOMING_HOOK="https://hooks.slack.com/services/T04DHMAF13Q/B04E7B1MQ1H/TDHEI123WUEE"
 ```
 
-Then the configured hook is available via pipeline object, we also provide convenience method to
-send Slack messages:
+Then, the configured hook is available via the pipeline object. We also provide a convenience method to send Slack messages:
 
 ```py
 from dlt.common.runtime.slack import send_slack_message
@@ -146,11 +126,11 @@ send_slack_message(pipeline.runtime_config.slack_incoming_hook, message)
 
 ## Enable Sentry tracing
 
-You can enable exception and runtime [tracing via Sentry](../running-in-production/tracing.md)
+You can enable exception and runtime [tracing via Sentry](../running-in-production/tracing.md).
 
 ## Set the log level and format
 
-You can set log level and switch logging to json format
+You can set the log level and switch logging to JSON format.
 
 ```toml
 [runtime]
@@ -158,26 +138,24 @@ log_level="INFO"
 log_format="JSON"
 ```
 
-`log_level` accepts the
-[Python standard logging level names](https://docs.python.org/3/library/logging.html#logging-levels).
+`log_level` accepts the [Python standard logging level names](https://docs.python.org/3/library/logging.html#logging-levels).
 
 - The default log level is `WARNING`.
-- `INFO` log level is useful when diagnosing problems in production.
-- `CRITICAL` will disable logging
+- The `INFO` log level is useful when diagnosing problems in production.
+- `CRITICAL` will disable logging.
 - `DEBUG` should not be used in production.
 
 `log_format` accepts:
 
-- `json` to get the log in json format
-- [Python standard log format specifier](https://docs.python.org/3/library/logging.html#logrecord-attributes)
+- `json` to get the log in JSON format.
+- [Python standard log format specifier](https://docs.python.org/3/library/logging.html#logrecord-attributes).
 
 As with any other configuration, you can use environment variables instead of the `toml` file.
 
-- `RUNTIME__LOG_LEVEL` to set the log level
-- `LOG_FORMAT` to set the log format
+- `RUNTIME__LOG_LEVEL` to set the log level.
+- `LOG_FORMAT` to set the log format.
 
-`dlt` logs to a logger named **dlt**. `dlt` logger uses a regular python logger so you can configure the handlers
-as per your requirement.
+`dlt` logs to a logger named **dlt**. `dlt` logger uses a regular Python logger, so you can configure the handlers as per your requirement.
 
 For example, to put logs to the file:
 ```py
@@ -208,13 +186,13 @@ class InterceptHandler(logging.Handler):
 
     @logger.catch(default=True, onerror=lambda _: sys.exit(1))
     def emit(self, record):
-        # Get corresponding Loguru level if it exists.
+        # Get the corresponding Loguru level if it exists.
         try:
             level = logger.level(record.levelname).name
         except ValueError:
             level = record.levelno
 
-        # Find caller from where originated the logged message.
+        # Find the caller from where the logged message originated.
         frame, depth = sys._getframe(6), 6
         while frame and frame.f_code.co_filename == logging.__file__:
             frame = frame.f_back
@@ -228,25 +206,25 @@ logger_dlt.addHandler(InterceptHandler())
 logger.add("dlt_loguru.log")
 ```
 
-## Handle exceptions, failed jobs and retry the pipeline
+## Handle exceptions, failed jobs, and retry the pipeline
 
 When any of the steps of the pipeline fails, an exception of type `PipelineStepFailed` is raised.
-Such exception contains the pipeline step name, the pipeline object itself and the step info ie
-`LoadInfo`. It provides the general information where the problem happened. In most of the cases,
+Such an exception contains the pipeline step name, the pipeline object itself, and the step info, i.e.,
+`LoadInfo`. It provides general information about where the problem occurred. In most cases,
 you can and should obtain the causing exception using the standard Python exception chaining
 (`__context__`).
 
 There are two different types of exceptions in `__context__`:
 
-1. **terminal exceptions** are exceptions that **should not be re-tried** because the error
-   situation will never recover without an intervention. Examples are: missing config and secret
-   values, most of the `40x` http errors, and several database errors (ie. missing relations like
-   tables). Each of destinations has its own set of terminal exceptions that `dlt` tries to
+1. **Terminal exceptions** are exceptions that **should not be retried** because the error
+   situation will never recover without intervention. Examples include missing config and secret
+   values, most of the `40x` HTTP errors, and several database errors (i.e., missing relations like
+   tables). Each destination has its own set of terminal exceptions that `dlt` tries to
    preserve.
-1. **transient exceptions** are exceptions that may be retried.
+2. **Transient exceptions** are exceptions that may be retried.
 
-Code below tells one exception type from another. Note that we provide retry strategy helpers that
-does that for you.
+The code below tells one exception type from another. Note that we provide retry strategy helpers that
+do that for you.
 
 ```py
 from dlt.common.exceptions import TerminalException
@@ -259,10 +237,10 @@ def check(ex: Exception):
 
 ### Failed jobs
 
-If any job in the package **fails terminally** it will be moved to `failed_jobs` folder and assigned
+If any job in the package **fails terminally**, it will be moved to the `failed_jobs` folder and assigned
 such status.
-By default, **an exceptions is raised** and on the first failed job, the load package will be aborted with `LoadClientJobFailed` (terminal exception).
-Such package will be completed but its load id is not added to the `_dlt_loads` table.
+By default, **an exception is raised** and on the first failed job, the load package will be aborted with `LoadClientJobFailed` (terminal exception).
+Such a package will be completed but its load id is not added to the `_dlt_loads` table.
 All the jobs that were running in parallel are completed before raising. The dlt state, if present, will not be visible to `dlt`.
 Here is an example `config.toml` to disable this behavior:
 
@@ -271,7 +249,7 @@ Here is an example `config.toml` to disable this behavior:
 load.raise_on_failed_jobs=false
 ```
 
-If you prefer dlt to to not raise a terminal exception on failed jobs then you can manually check for failed jobs and raise an exception by checking the load info as follows:
+If you prefer dlt not to raise a terminal exception on failed jobs, then you can manually check for failed jobs and raise an exception by checking the load info as follows:
 
 ```py
 # returns True if there are failed jobs in any of the load packages
@@ -281,14 +259,14 @@ load_info.raise_on_failed_jobs()
 ```
 
 :::caution
-Note that certain write dispositions will irreversibly modify your data
+Note that certain write dispositions will irreversibly modify your data:
 1. `replace` write disposition with the default `truncate-and-insert` [strategy](../general-usage/full-loading.md) will truncate tables before loading.
 2. `merge` write disposition will merge staging dataset tables into the destination dataset. This will happen only when all data for this table (and nested tables) got loaded.
 
 Here's what you can do to deal with partially loaded packages:
-1. Retry the load step in case of transient errors
-2. Use replace strategy with staging dataset so replace happens only when data for the table (and all nested tables) was fully loaded and is atomic operation (if possible)
-3. Use only "append" write disposition. When your load package fails you are able to use `_dlt_load_id` to remove all unprocessed data.
+1. Retry the load step in case of transient errors.
+2. Use replace strategy with staging dataset so replace happens only when data for the table (and all nested tables) was fully loaded and is an atomic operation (if possible).
+3. Use only "append" write disposition. When your load package fails, you are able to use `_dlt_load_id` to remove all unprocessed data.
 4. Use "staging append" (`merge` disposition without primary key and merge key defined).
 
 :::
@@ -296,20 +274,20 @@ Here's what you can do to deal with partially loaded packages:
 
 ### What `run` does inside
 
-Before adding retry to pipeline steps, note how `run` method actually works:
+Before adding retry to pipeline steps, note how the `run` method actually works:
 
-1. The `run` method will first use `sync_destination` method to synchronize pipeline state and
-   schemas with the destination. Obviously at this point connection to the destination is
-   established (which may fail and be retried)
-1. Next it will make sure that data from the previous runs is fully processed. If not, `run` method
-   normalizes, loads pending data items and **exits**
-1. If there was no pending data, new data from `data` argument is extracted, normalized and loaded.
+1. The `run` method will first use the `sync_destination` method to synchronize pipeline state and
+   schemas with the destination. Obviously, at this point, a connection to the destination is
+   established (which may fail and be retried).
+2. Next, it will make sure that data from the previous runs is fully processed. If not, the `run` method
+   normalizes, loads pending data items, and **exits**.
+3. If there was no pending data, new data from the `data` argument is extracted, normalized, and loaded.
 
 ### Retry helpers and `tenacity`
 
-By default `dlt` does not retry any of the pipeline steps. This is left to the included helpers and
-the [tenacity](https://tenacity.readthedocs.io/en/latest/) library. Snippet below will retry the
-`load` stage with the `retry_load` strategy and defined back-off or re-raise exception for any other
+By default, `dlt` does not retry any of the pipeline steps. This is left to the included helpers and
+the [tenacity](https://tenacity.readthedocs.io/en/latest/) library. The snippet below will retry the
+`load` stage with the `retry_load` strategy and define back-off or re-raise exceptions for any other
 steps (`extract`, `normalize`) and for terminal exceptions.
 
 ```py
@@ -317,15 +295,15 @@ from tenacity import stop_after_attempt, retry_if_exception, Retrying, retry
 from dlt.common.runtime.slack import send_slack_message
 from dlt.pipeline.helpers import retry_load
 
-if __name__ == "__main__" :
+if __name__ == "__main__":
     pipeline = dlt.pipeline(pipeline_name="chess_pipeline", destination='duckdb', dataset_name="games_data")
     # get data for a few famous players
     data = chess(['magnuscarlsen', 'rpragchess'], start_month="2022/11", end_month="2022/12")
     try:
 
-        for attempt in Retrying(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1.5, min=4, max=10), retry=retry_if_exception(retry_load(())), reraise=True):
+        for attempt in Retrying(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1.5, min=4, max=10), retry=retry_if_exception(retry_load()), reraise=True):
             with attempt:
-                load_info = p.run(data)
+                load_info = pipeline.run(data)
                 send_slack_message(pipeline.runtime_config.slack_incoming_hook, "HOORAY 😄")
     except Exception:
         # we get here after all the retries
@@ -336,13 +314,14 @@ if __name__ == "__main__" :
 You can also use `tenacity` to decorate functions. This example additionally retries on `extract`:
 
 ```py
-if __name__ == "__main__" :
+if __name__ == "__main__":
     pipeline = dlt.pipeline(pipeline_name="chess_pipeline", destination='duckdb', dataset_name="games_data")
 
     @retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1.5, min=4, max=10), retry=retry_if_exception(retry_load(("extract", "load"))), reraise=True)
     def load():
-        data = chess(['magnuscarlsen','vincentkeymer', 'dommarajugukesh', 'rpragchess'], start_month="2022/11", end_month="2022/12")
+        data = chess(['magnuscarlsen', 'vincentkeymer', 'dommarajugukesh', 'rpragchess'], start_month="2022/11", end_month="2022/12")
         return pipeline.run(data)
 
     load_info = load()
 ```
+
diff --git a/docs/website/docs/running-in-production/tracing.md b/docs/website/docs/running-in-production/tracing.md
index b30750d53f..58c2c91be3 100644
--- a/docs/website/docs/running-in-production/tracing.md
+++ b/docs/website/docs/running-in-production/tracing.md
@@ -14,18 +14,18 @@ default.**
 
 An exception trace is sent when:
 
-- Any Python logger (including `dlt`) logs an error
+- Any Python logger (including `dlt`) logs an error.
 - Any Python logger (including `dlt`) logs a warning (enabled only if the `dlt` logging level is
-  `WARNING` or below)
-- On unhandled exceptions
+  `WARNING` or below).
+- On unhandled exceptions.
 
 A transaction trace is sent when the `pipeline.run` is called. We send information when
-[extract, normalize and load](../reference/explainers/how-dlt-works.md) steps are completed.
+[extract, normalize, and load](../reference/explainers/how-dlt-works.md) steps are completed.
 
 The data available in Sentry makes finding and documenting bugs easy, allowing you to easily find
 bottlenecks and profile data extraction, normalization, and loading.
 
-`dlt` adds a set of additional tags (e,g. pipeline name, destination name) to the sentry data.
+`dlt` adds a set of additional tags (e.g., pipeline name, destination name) to the Sentry data.
 
 Please refer to the Sentry [documentation](https://docs.sentry.io/platforms/python/data-collected/).
 
@@ -46,10 +46,10 @@ Alternatively, you can use environment variables:
 RUNTIME__SENTRY_DSN="https:///<...>"
 ```
 
-The entry client is configured after the first pipeline is created with `dlt.pipeline()`. Feel free
+The Sentry client is configured after the first pipeline is created with `dlt.pipeline()`. Feel free
 to use `sentry_sdk` init again to cover your specific needs.
 
-> 💡 `dlt` does not have sentry client as a dependency. Remember to install it with `pip install sentry-sdk`.
+> 💡 `dlt` does not have Sentry client as a dependency. Remember to install it with `pip install sentry-sdk`.
 
 ## Disable all tracing
 
@@ -59,3 +59,4 @@ Sentry. Using `config.toml`:
 ```toml
 enable_runtime_trace=false
 ```
+

From f293b98f735ff16bd40f17c4d1f768adeb773833 Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Wed, 2 Oct 2024 12:24:12 +0200
Subject: [PATCH 16/29] Docs: fix the code snippet in credentials/setup.md
 (#1910)

---
 docs/website/docs/general-usage/credentials/setup.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/website/docs/general-usage/credentials/setup.md b/docs/website/docs/general-usage/credentials/setup.md
index 4210ab5422..ccd131d9fa 100644
--- a/docs/website/docs/general-usage/credentials/setup.md
+++ b/docs/website/docs/general-usage/credentials/setup.md
@@ -246,18 +246,18 @@ as a supplier of `config` and `secret` values. The code below demonstrates how t
 
 ```py
 import dlt
-
 from dlt.common.configuration.providers import CustomLoaderDocProvider
 
-# create a function that loads a dict
+# Create a function that loads a dict
 def load_config():
-   with open("config.json", "rb") as f:
-      config_dict = json.load(f)
+    with open("config.json", "rb") as f:
+        return json.load(f)
+
 
-# create the custom provider
+# Create the custom provider
 provider = CustomLoaderDocProvider("my_json_provider", load_config)
 
-# register provider
+# Register provider
 dlt.config.register_provider(provider)
 ```
 

From 6abc55334f1b2ce2d2603f511c99be3d773b8a81 Mon Sep 17 00:00:00 2001
From: David Scharf <shrps@posteo.net>
Date: Wed, 2 Oct 2024 13:18:05 +0200
Subject: [PATCH 17/29] docs: final grammar fix! pages 120-139  (#1913)

* tmp

* docs 120-139

* Apply suggestions from code review

Co-authored-by: Alena Astrakhantseva <alena@dlthub.com>

---------

Co-authored-by: Alena Astrakhantseva <alena@dlthub.com>
---
 .../explainers/airflow-gcp-cloud-composer.md  |  26 +--
 .../reference/explainers/how-dlt-works.md     |  13 +-
 .../walkthroughs/add-a-verified-source.md     |  12 +-
 .../add-incremental-configuration.md          |  49 ++---
 .../docs/walkthroughs/add_credentials.md      |  21 +-
 .../docs/walkthroughs/adjust-a-schema.md      |  48 ++---
 .../docs/walkthroughs/create-a-pipeline.md    |  30 ++-
 .../walkthroughs/create-new-destination.md    | 204 +++++++++---------
 .../deploy-gcp-cloud-function-as-webhook.md   |  11 +-
 .../deploy-with-airflow-composer.md           | 140 ++++++------
 .../deploy-a-pipeline/deploy-with-dagster.md  |  36 ++--
 .../deploy-with-github-actions.md             |   9 +-
 .../deploy-with-google-cloud-functions.md     |  52 ++---
 .../deploy-a-pipeline/deploy-with-kestra.md   |  44 ++--
 .../deploy-a-pipeline/deploy-with-prefect.md  |  33 +--
 .../dispatch-to-multiple-tables.md            |  28 +--
 .../docs/walkthroughs/run-a-pipeline.md       |  83 +++----
 .../docs/walkthroughs/share-a-dataset.md      |  55 +++--
 .../docs/walkthroughs/zendesk-weaviate.md     |  27 +--
 19 files changed, 439 insertions(+), 482 deletions(-)

diff --git a/docs/website/docs/reference/explainers/airflow-gcp-cloud-composer.md b/docs/website/docs/reference/explainers/airflow-gcp-cloud-composer.md
index fdb1b12da6..a6243f803f 100644
--- a/docs/website/docs/reference/explainers/airflow-gcp-cloud-composer.md
+++ b/docs/website/docs/reference/explainers/airflow-gcp-cloud-composer.md
@@ -10,13 +10,11 @@ keywords: [airflow, github, google cloud composer]
 
 This setup will allow you to deploy the main branch of your Airflow project from GitHub to Cloud Composer.
 
-- Create a GitHub repository ie. by following our how-to guide on [deployment for Airflow](../../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md)
+- Create a GitHub repository, for example, by following our how-to guide on [deployment for Airflow](../../walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md).
 
-- In Google Cloud web interface, go to Source Repositories and create a repository that mirrors your
-  GitHub repository. This will simplify the authentication by doing it through this mirroring
-  service.
+- In the Google Cloud web interface, go to Source Repositories and create a repository that mirrors your GitHub repository. This will simplify authentication by using this mirroring service.
 
-- In Cloud Build, add a trigger on commit to main.
+- In Cloud Build, add a trigger on commit to the main branch.
 
 - Point it to your Cloud Build file. In our example, we place our file at `build/cloudbuild.yaml`.
 
@@ -26,14 +24,13 @@ This setup will allow you to deploy the main branch of your Airflow project from
 
   ![test-composer](/img/test-composer.png)
 
-- In your `cloudbuild.yaml`, set the bucket name
+- In your `cloudbuild.yaml`, set the bucket name.
 
-- Make sure your repository code is pushed to main.
+- Make sure your repository code is pushed to the main branch.
 
-- Run the trigger you build (in Cloud Build).
+- Run the trigger you built (in Cloud Build).
 
-- Wait a minute, and check if your files arrived in the bucket. In our case, we added a `pipedrive`
-  folder, and we can see it appeared.
+- Wait a minute, and check if your files have arrived in the bucket. In our case, we added a `pipedrive` folder, and we can see it appeared.
 
   ![bucket-details](/img/bucket-details.png)
 
@@ -41,16 +38,15 @@ This setup will allow you to deploy the main branch of your Airflow project from
 
 ### Adding the libraries needed
 
-Assuming you already spun up a Cloud Composer.
+Assuming you have already spun up a Cloud Composer:
 
-- Make sure the user you added has rights to change the base image (add libraries). I already had
-  these added, you may get away with less (not clear in docs):
+- Make sure the user you added has rights to change the base image (add libraries). I already had these added; you may get away with fewer (not clear in docs):
 
   - Artifact Registry Administrator;
   - Artifact Registry Repository Administrator;
   - Remote Build Execution Artifact Admin;
 
-- Navigate to your composer environment and add the needed libraries. In the case of this example
-  pipedrive pipeline, we only need dlt, so add `dlt` library.
+- Navigate to your composer environment and add the needed libraries. In the case of this example pipedrive pipeline, we only need the sdf library, so add the `dlt` library.
 
   ![add-package](/img/add-package.png)
+
diff --git a/docs/website/docs/reference/explainers/how-dlt-works.md b/docs/website/docs/reference/explainers/how-dlt-works.md
index 7de29129e9..fa73babd03 100644
--- a/docs/website/docs/reference/explainers/how-dlt-works.md
+++ b/docs/website/docs/reference/explainers/how-dlt-works.md
@@ -7,8 +7,8 @@ keywords: [architecture, extract, normalize, load]
 # How `dlt` works
 
 `dlt` automatically turns JSON returned by any [source](../../general-usage/glossary.md#source)
-(e.g. an API) into a live dataset stored in the
-[destination](../../general-usage/glossary.md#destination) of your choice (e.g. Google BigQuery). It
+(e.g., an API) into a live dataset stored in the
+[destination](../../general-usage/glossary.md#destination) of your choice (e.g., Google BigQuery). It
 does this by first [extracting](how-dlt-works.md#extract) the JSON data, then
 [normalizing](how-dlt-works.md#normalize) it to a schema, and finally [loading](how-dlt-works#load)
 it to the location where you will store it.
@@ -24,14 +24,15 @@ JSON and provides it to `dlt` as input, which then normalizes that data.
 ## Normalize
 
 The configurable normalization engine in `dlt` recursively unpacks this nested structure into
-relational tables (i.e. inferring data types, linking tables to create nested relationships,
+relational tables (i.e., inferring data types, linking tables to create nested relationships,
 etc.), making it ready to be loaded. This creates a
-[schema](../../general-usage/glossary.md#schema), which will automatically evolve to any future
-source data changes (e.g. new fields or tables).
+[schema](../../general-usage/glossary.md#schema), which will automatically evolve to accommodate any future
+source data changes (e.g., new fields or tables).
 
 ## Load
 
 The data is then loaded into your chosen [destination](../../general-usage/glossary.md#destination).
 `dlt` uses configurable, idempotent, atomic loads that ensure data safely ends up there. For
-example, you don't need to worry about the size of the data you are loading and if the process is
+example, you don't need to worry about the size of the data you are loading, and if the process is
 interrupted, it is safe to retry without creating errors.
+
diff --git a/docs/website/docs/walkthroughs/add-a-verified-source.md b/docs/website/docs/walkthroughs/add-a-verified-source.md
index 144b805974..2e0e40e345 100644
--- a/docs/website/docs/walkthroughs/add-a-verified-source.md
+++ b/docs/website/docs/walkthroughs/add-a-verified-source.md
@@ -27,7 +27,7 @@ List available sources to see their names and descriptions:
 dlt init --list-sources
 ```
 
-Now pick one of the source names, for example `pipedrive` and a destination i.e. `bigquery`:
+Now pick one of the source names, for example, `pipedrive` and a destination, i.e., `bigquery`:
 
 ```sh
 dlt init pipedrive bigquery
@@ -80,7 +80,7 @@ For adding them locally or on your orchestrator, please see the following guide
 
 ## 3. Customize or write a pipeline script
 
-Once you initialized the pipeline, you will have a sample file `pipedrive_pipeline.py`.
+Once you have initialized the pipeline, you will have a sample file `pipedrive_pipeline.py`.
 
 This is the developer's suggested way to use the pipeline, so you can use it as a starting point -
 in our case, we can choose to run a method that loads all data, or we can choose which endpoints
@@ -95,7 +95,7 @@ You can modify an existing verified source in place.
 - If that modification is generally useful for anyone using this source, consider contributing it
   back via a PR. This way, we can ensure it is tested and maintained.
 - If that modification is not a generally shared case, then you are responsible for maintaining it.
-  We suggest making any of your own customisations modular is possible, so you can keep pulling the
+  We suggest making any of your own customizations modular if possible, so you can keep pulling the
   updated source from the community repo in the event of source maintenance.
 
 ## 5. Add more sources to your project
@@ -120,7 +120,7 @@ the parent folder:
 dlt init pipedrive bigquery
 ```
 
-## 7. Advanced: Using dlt init with branches, local folders or git repos
+## 7. Advanced: Using dlt init with branches, local folders, or git repos
 
 To find out more info about this command, use --help:
 
@@ -134,9 +134,9 @@ To deploy from a branch of the `verified-sources` repo, you can use the followin
 dlt init source destination --branch <branch_name>
 ```
 
-To deploy from another repo, you could fork the verified-sources repo and then provide the new repo
-url as below, replacing `dlt-hub` with your fork name:
+To deploy from another repo, you could fork the verified-sources repo and then provide the new repo URL as below, replacing `dlt-hub` with your fork name:
 
 ```sh
 dlt init pipedrive bigquery --location "https://github.com/dlt-hub/verified-sources"
 ```
+
diff --git a/docs/website/docs/walkthroughs/add-incremental-configuration.md b/docs/website/docs/walkthroughs/add-incremental-configuration.md
index 5cedec7ed5..b51cde8470 100644
--- a/docs/website/docs/walkthroughs/add-incremental-configuration.md
+++ b/docs/website/docs/walkthroughs/add-incremental-configuration.md
@@ -7,12 +7,12 @@ slug: sql-incremental-configuration
 
 # Add incremental configuration to SQL resources
 Incremental loading is the act of loading only new or changed data and not old records that have already been loaded.
-For example, a bank loading only the latest transactions or a company updating its database with new or modified user
+For example, a bank loads only the latest transactions, or a company updates its database with new or modified user
 information. In this article, we’ll discuss a few incremental loading strategies.
 
 :::important
 Processing data incrementally, or in batches, enhances efficiency, reduces costs, lowers latency, improves scalability,
- and optimizes resource utilization.
+and optimizes resource utilization.
 :::
 
 ### Incremental loading strategies
@@ -28,10 +28,11 @@ In this guide, we will discuss various incremental loading methods using `dlt`,
 
 ## Code examples
 
+
+
 ### 1. Full load (replace)
 
-A full load strategy completely overwrites the existing data with the new dataset. This is useful when you want to
-refresh the entire table with the latest data.
+A full load strategy completely overwrites the existing data with the new dataset. This is useful when you want to refresh the entire table with the latest data.
 
 :::note
 This strategy technically does not load only new data but instead reloads all data: old and new.
@@ -39,14 +40,14 @@ This strategy technically does not load only new data but instead reloads all da
 
 Here’s a walkthrough:
 
-1. The initial table, named "contact", in the SQL source looks like this:
+1. The initial table, named "contact," in the SQL source looks like this:
 
     | id | name | created_at |
     | --- | --- | --- |
     | 1 | Alice | 2024-07-01 |
     | 2 | Bob | 2024-07-02 |
 
-2. The python code illustrates the process of loading data from an SQL source into BigQuery using the `dlt` pipeline. Please note the `write_disposition = "replace”` used below.
+2. The Python code illustrates the process of loading data from an SQL source into BigQuery using the `dlt` pipeline. Please note the `write_disposition = "replace"` used below.
 
     ```py
     def load_full_table_resource() -> None:
@@ -94,9 +95,7 @@ Here’s a walkthrough:
 
 **What happened?**
 
-After running the pipeline, the original data in the "contact" table (Alice and Bob) is completely replaced with the new
-updated table with data “Charlie” and “Dave” added and “Bob” removed. This strategy is useful for scenarios where the entire
-dataset needs to be refreshed/replaced with the latest information.
+After running the pipeline, the original data in the "contact" table (Alice and Bob) is completely replaced with the new updated table with data “Charlie” and “Dave” added and “Bob” removed. This strategy is useful for scenarios where the entire dataset needs to be refreshed or replaced with the latest information.
 
 ### 2. Append new records based on incremental ID
 
@@ -104,14 +103,14 @@ This strategy appends only new records to the table based on an incremental ID.
 
 Here’s a walkthrough:
 
-1. The initial table, named "contact", in the SQL source looks like this:
+1. The initial table, named "contact," in the SQL source looks like this:
 
     | id | name | created_at |
     | --- | --- | --- |
     | 1 | Alice | 2024-07-01 |
     | 2 | Bob | 2024-07-02 |
 
-2. The python code demonstrates loading data from an SQL source into BigQuery using an incremental variable, `id`. This variable tracks new or updated records in the `dlt` pipeline. Please note the `write_disposition = "append”` used below.
+2. The Python code demonstrates loading data from an SQL source into BigQuery using an incremental variable, `id`. This variable tracks new or updated records in the `dlt` pipeline. Please note the `write_disposition = "append"` used below.
 
     ```py
     def load_incremental_id_table_resource() -> None:
@@ -133,7 +132,7 @@ Here’s a walkthrough:
         print(info)
     ```
 
-3. After running the `dlt` pipeline, the data loaded into BigQuery "contact" table looks like:
+3. After running the `dlt` pipeline, the data loaded into the BigQuery "contact" table looks like:
 
     | Row | id | name | created_at | _dlt_load_id | _dlt_id |
     | --- | --- | --- | --- | --- | --- |
@@ -161,20 +160,20 @@ Here’s a walkthrough:
 
 In this scenario, the pipeline appends new records (Charlie and Dave) to the existing data (Alice and Bob) without affecting the pre-existing entries. This strategy is ideal when only new data needs to be added, preserving the historical data.
 
-### 3. Append new records based on timestamp ("created_at")
+### Append new records based on timestamp ("created_at")
 
 This strategy appends only new records to the table based on a date/timestamp field. It is useful for scenarios where records are created with a timestamp, and you want to load only those records created after a certain date.
 
 Here’s a walkthrough:
 
-1. The initial dataset, named "contact", in the SQL source looks like this:
+1. The initial dataset, named "contact," in the SQL source looks like this:
 
     | id | name | created_at |
     | --- | --- | --- |
     | 1 | Alice | 2024-07-01 00:00:00 |
     | 2 | Bob | 2024-07-02 00:00:00 |
 
-2. The python code illustrates the process of loading data from an SQL source into BigQuery using the `dlt` pipeline. Please note the `write_disposition = "append"`, with `created_at` being used as the incremental parameter.
+2. The Python code illustrates the process of loading data from an SQL source into BigQuery using the `dlt` pipeline. Please note the `write_disposition = "append"`, with `created_at` being used as the incremental parameter.
 
     ```py
     def load_incremental_timestamp_table_resource() -> None:
@@ -199,7 +198,7 @@ Here’s a walkthrough:
     load_incremental_timestamp_table_resource()
     ```
 
-3. After running the `dlt` pipeline, the data loaded into BigQuery "contact" table looks like:
+3. After running the `dlt` pipeline, the data loaded into the BigQuery "contact" table looks like:
 
     | Row | id | name | created_at | _dlt_load_id | _dlt_id |
     | --- | --- | --- | --- | --- | --- |
@@ -225,13 +224,11 @@ Here’s a walkthrough:
 
 **What happened?**
 
-The pipeline adds new records (Charlie and Dave) that have a `created_at` timestamp after the specified initial value while
-retaining the existing data (Alice and Bob). This approach is useful for loading data incrementally based on when it was created.
+The pipeline adds new records (Charlie and Dave) that have a `created_at` timestamp after the specified initial value while retaining the existing data (Alice and Bob). This approach is useful for loading data incrementally based on when it was created.
 
-### 4. Merge (Update/Insert) records based on timestamp ("last_modified_at") and ID
+### 4. Merge (update/insert) records based on timestamp ("last_modified_at") and ID
 
-This strategy merges records based on a composite key of ID and a timestamp field. It updates existing records and inserts
-new ones as necessary.
+This strategy merges records based on a composite key of ID and a timestamp field. It updates existing records and inserts new ones as necessary.
 
 Here’s a walkthrough:
 
@@ -242,7 +239,7 @@ Here’s a walkthrough:
     | 1 | Alice | 2024-07-01 00:00:00 |
     | 2 | Bob | 2024-07-02 00:00:00 |
 
-2. The Python code illustrates the process of loading data from an SQL source into BigQuery using the `dlt` pipeline Please note the `write_disposition = "merge"`, with `last_modified_at` being used as the incremental parameter.
+2. The Python code illustrates the process of loading data from an SQL source into BigQuery using the `dlt` pipeline. Please note the `write_disposition = "merge"`, with `last_modified_at` being used as the incremental parameter.
 
     ```py
     def load_merge_table_resource() -> None:
@@ -292,9 +289,7 @@ Here’s a walkthrough:
 
 **What happened?**
 
-The pipeline updates the record for Alice with the new data, including the updated `last_modified_at` timestamp, and adds a
-new record for Hank. This method is beneficial when you need to ensure that records are both updated and inserted based on a
-specific timestamp and ID.
+The pipeline updates the record for Alice with the new data, including the updated `last_modified_at` timestamp, and adds a new record for Hank. This method is beneficial when you need to ensure that records are both updated and inserted based on a specific timestamp and ID.
+
+The examples provided explain how to use `dlt` to achieve different incremental loading scenarios, highlighting the changes before and after running each pipeline.
 
-The examples provided explain how to use `dlt` to achieve different incremental loading scenarios, highlighting the changes
-before and after running each pipeline.
\ No newline at end of file
diff --git a/docs/website/docs/walkthroughs/add_credentials.md b/docs/website/docs/walkthroughs/add_credentials.md
index bc0fb3b409..d269e4cb97 100644
--- a/docs/website/docs/walkthroughs/add_credentials.md
+++ b/docs/website/docs/walkthroughs/add_credentials.md
@@ -10,8 +10,7 @@ keywords: [credentials, secrets.toml, environment variables]
 
 When using a pipeline locally, we recommend using the `.dlt/secrets.toml` method.
 
-To do so, open your dlt secrets file and match the source names and credentials to the ones in your
-script, for example:
+To do so, open your dlt secrets file and match the source names and credentials to the ones in your script, for example:
 
 ```toml
 [sources.pipedrive]
@@ -27,8 +26,7 @@ client_email = "client_email" # please set me up!
 ```
 > Note that for toml names are case-sensitive and sections are separated with ".".
 
-For destination credentials, read the [documentation pages for each destination](../dlt-ecosystem/destinations) to create and configure
-credentials.
+For destination credentials, read the [documentation pages for each destination](../dlt-ecosystem/destinations) to create and configure credentials.
 
 For Verified Source credentials, read the [Setup Guides](../dlt-ecosystem/verified-sources) for each source to find how to get credentials.
 
@@ -41,8 +39,7 @@ Read more about [credential configuration.](../general-usage/credentials)
 To add credentials to your deployment,
 
 - either use one of the `dlt deploy` commands;
-- or follow the instructions to [pass credentials via code](../general-usage/credentials/advanced#examples)
-or [environment](../general-usage/credentials/setup#environment-variables).
+- or follow the instructions to [pass credentials via code](../general-usage/credentials/advanced#examples) or [environment](../general-usage/credentials/setup#environment-variables).
 
 ### Reading credentials from environment variables
 
@@ -76,7 +73,8 @@ DESTINATION__BIGQUERY__LOCATION
 ```
 
 ## Retrieving credentials from Google Cloud Secret Manager
-To retrieve secrets from Google Cloud Secret Manager using Python, and convert them into a dictionary format, you'll need to follow these steps. First, ensure that you have the necessary permissions to access the secrets on Google Cloud, and have the `google-cloud-secret-manager` library installed. If not, you can install it using pip:
+
+To retrieve secrets from Google Cloud Secret Manager using Python and convert them into a dictionary format, you'll need to follow these steps. First, ensure that you have the necessary permissions to access the secrets on Google Cloud and have the `google-cloud-secret-manager` library installed. If not, you can install it using pip:
 
 ```sh
 pip install google-cloud-secret-manager
@@ -90,7 +88,7 @@ Here's how you can retrieve secrets and convert them into a dictionary:
 2. **Access the secret**: Use the client to access the secret's latest version.
 3. **Convert to a dictionary**: If the secret is stored in a structured format (like JSON), parse it into a Python dictionary.
 
-Assume we store secrets in JSON format with name "temp-secret":
+Assume we store secrets in JSON format with the name "temp-secret":
 ```json
 {"api_token": "ghp_Kskdgf98dugjf98ghd...."}
 ```
@@ -103,7 +101,7 @@ Set `.dlt/secrets.toml` as:
 "private_key" = "-----BEGIN PRIVATE KEY-----\n....\n-----END PRIVATE KEY-----\n"
 "client_email" = "....gserviceaccount.com"
 ```
-or `GOOGLE_SECRETS__CREDENTIALS` to the path of your service account key file.
+or set `GOOGLE_SECRETS__CREDENTIALS` to the path of your service account key file.
 
 Retrieve the secrets stored in the Secret Manager as follows:
 
@@ -159,8 +157,9 @@ load_info = pipeline.run(response.json())
 print(load_info)
 ```
 
-### Points to Note:
+### Points to note:
 
 - **Permissions**: Ensure the service account or user credentials you are using have the necessary permissions to access the Secret Manager and the specific secrets.
 - **Secret format**: This example assumes that the secret is stored in a JSON string format. If your secret is in a different format, you will need to adjust the parsing method accordingly.
-- **Google Cloud authentication**: Make sure your environment is authenticated with Google Cloud. This can typically be done by setting credentials in `.dlt/secrets.toml` or setting the `GOOGLE_SECRETS__CREDENTIALS` environment variable to the path of your service account key file or the dict of credentials as a string.
\ No newline at end of file
+- **Google Cloud authentication**: Make sure your environment is authenticated with Google Cloud. This can typically be done by setting credentials in `.dlt/secrets.toml` or setting the `GOOGLE_SECRETS__CREDENTIALS` environment variable to the path of your service account key file or the dict of credentials as a string.
+
diff --git a/docs/website/docs/walkthroughs/adjust-a-schema.md b/docs/website/docs/walkthroughs/adjust-a-schema.md
index c6c7ddcb22..7b05e96d32 100644
--- a/docs/website/docs/walkthroughs/adjust-a-schema.md
+++ b/docs/website/docs/walkthroughs/adjust-a-schema.md
@@ -14,7 +14,7 @@ you. Here's how you do it.
 
 Set up an export folder by providing the `export_schema_path` argument to `dlt.pipeline` to save the
 schema. Set up an import folder from which `dlt` will read your modifications by providing
-`import_schema_path` argument.
+the `import_schema_path` argument.
 
 Following our example in [run a pipeline](run-a-pipeline.md):
 
@@ -37,7 +37,7 @@ schemas
 ```
 
 Rather than providing the paths in the `dlt.pipeline` function, you can also set them
-in the in `config.toml` file:
+in the `config.toml` file:
 
 ```toml
 export_schema_path="schemas/export"
@@ -47,10 +47,10 @@ import_schema_path="schemas/import"
 ## 2. Run the pipeline to see the schemas
 
 To see the schemas, you must run your pipeline again. The `schemas` and `import`/`export`
-directories will be created. In each directory, you'll see a `yaml` file (e.g. `chess.schema.yaml`).
+directories will be created. In each directory, you'll see a `yaml` file (e.g., `chess.schema.yaml`).
 
 Look at the export schema (in the export folder): this is the schema that got inferred from the data
-and was used to load it into the destination (e.g. `duckdb`).
+and was used to load it into the destination (e.g., `duckdb`).
 
 ## 3. Make changes in import schema
 
@@ -64,18 +64,18 @@ You should keep the import schema as simple as possible and let `dlt` do the res
 1. When a new pipeline is created and the source function is extracted for the first time, a new
    schema is added to the pipeline. This schema is created out of global hints and resource hints
    present in the source extractor function.
-1. Every such new schema will be saved to the `import` folder (if it does not exist there already)
+2. Every such new schema will be saved to the `import` folder (if it does not exist there already)
    and used as the initial version for all future pipeline runs.
-1. Once a schema is present in `import` folder, **it is writable by the user only**.
-1. Any changes to the schemas in that folder are detected and propagated to the pipeline
-   automatically on the next run. It means that after a user update, the schema in `import`
+3. Once a schema is present in the `import` folder, **it is writable by the user only**.
+4. Any changes to the schemas in that folder are detected and propagated to the pipeline
+   automatically on the next run. It means that after a user update, the schema in the `import`
    folder reverts all the automatic updates from the data.
 
-In next steps we'll experiment a lot, you will be warned to set `dev_mode=True` until we are done experimenting.
+In the next steps, we'll experiment a lot; you will be warned to set `dev_mode=True` until we are done experimenting.
 
 :::caution
 `dlt` will **not modify** tables after they are created.
-So if you have a `yaml` file, and you change it (e.g. change a data type or add a hint),
+So if you have a `yaml` file, and you change it (e.g., change a data type or add a hint),
 then you need to **delete the dataset**
 or set `dev_mode=True`:
 ```py
@@ -92,8 +92,7 @@ dlt.pipeline(
 
 ### Change the data type
 
-In export schema we see that `end_time` column in `players_games` has a `text` data type while we
-know that there is a timestamp. Let's change it and see if it works.
+In the export schema, we see that the `end_time` column in `players_games` has a `text` data type, while we know that it is a timestamp. Let's change it and see if it works.
 
 Copy the column:
 
@@ -117,18 +116,18 @@ Run the pipeline script again and make sure that the change is visible in the ex
 [launch the Streamlit app](../dlt-ecosystem/visualizations/exploring-the-data.md) to see the changed data.
 
 :::note
-Do not rename the tables or columns in the yaml file. `dlt` infers those from the data so the schema will be recreated.
-You can [adjust the schema](../general-usage/resource.md#adjust-schema) in Python before resource is loaded.
+Do not rename the tables or columns in the YAML file. `dlt` infers those from the data, so the schema will be recreated.
+You can [adjust the schema](../general-usage/resource.md#adjust-schema) in Python before the resource is loaded.
 :::
 
 ### Reorder columns
 To reorder the columns in your dataset, follow these steps:
 
-1.	Initial Run: Execute the pipeline to obtain the import and export schemas.
-1.	Modify Export Schema: Adjust the column order as desired in the export schema.
-1.	Sync Import Schema: Ensure that these changes are mirrored in the import schema to maintain consistency.
-1.	Delete Dataset: Remove the existing dataset to prepare for the reload.
-1.	Reload Data: Reload the data. The dataset should now reflect the new column order as specified in the import YAML.
+1. Initial Run: Execute the pipeline to obtain the import and export schemas.
+1. Modify Export Schema: Adjust the column order as desired in the export schema.
+1. Sync Import Schema: Ensure that these changes are mirrored in the import schema to maintain consistency.
+1. Delete Dataset: Remove the existing dataset to prepare for the reload.
+1. Reload Data: Reload the data. The dataset should now reflect the new column order as specified in the import YAML.
 
 These steps ensure that the column order in your dataset matches your specifications.
 
@@ -148,9 +147,9 @@ load_info = pipeline.run(data_source)
 
 In this example, the `add_map` function reorders columns by defining a new mapping. The lambda function specifies the desired order by rearranging the key-value pairs. When the pipeline runs, the data will load with the columns in the new order.
 
-### Load data as json instead of generating nested table or columns from flattened dicts
+### Load data as JSON instead of generating nested tables or columns from flattened dicts
 
-In the export schema, you can see that white and black players properties got flattened into:
+In the export schema, you can see that the properties of white and black players got flattened into:
 
 ```yaml
 white__rating:
@@ -165,8 +164,8 @@ white__aid:
 ```
 
 For some reason, you'd rather deal with a single JSON (or struct) column. Just declare the `white`
-column as `json`, which will instruct `dlt` not to flatten it (or not convert into nested table in
-case of a list). Do the same with `black` column:
+column as `json`, which will instruct `dlt` not to flatten it (or not convert into a nested table in
+case of a list). Do the same with the `black` column:
 
 ```yaml
 players_games:
@@ -182,7 +181,7 @@ players_games:
       data_type: json
 ```
 
-Run the pipeline script again, and now you can query `black` and `white` columns with JSON
+Run the pipeline script again, and now you can query the `black` and `white` columns with JSON
 expressions.
 
 ### Add performance hints
@@ -210,3 +209,4 @@ players_games:
 
 Just add and push the import folder to git. It will be used automatically when cloned. Alternatively,
 [bundle such schema with your source](../general-usage/schema.md#attaching-schemas-to-sources).
+
diff --git a/docs/website/docs/walkthroughs/create-a-pipeline.md b/docs/website/docs/walkthroughs/create-a-pipeline.md
index 0aa253dc10..d13b654654 100644
--- a/docs/website/docs/walkthroughs/create-a-pipeline.md
+++ b/docs/website/docs/walkthroughs/create-a-pipeline.md
@@ -51,7 +51,7 @@ pip install -r requirements.txt
 
 ## 2. Obtain and add API credentials from GitHub
 
-You will need to [sign in](https://github.com/login) to your GitHub account and create your access token via [Personal access tokens page](https://github.com/settings/tokens).
+You will need to [sign in](https://github.com/login) to your GitHub account and create your access token via the [Personal access tokens page](https://github.com/settings/tokens).
 
 Copy your new access token over to `.dlt/secrets.toml`:
 
@@ -63,7 +63,7 @@ api_secret_key = '<api key value>'
 This token will be used by `github_api_source()` to authenticate requests.
 
 The **secret name** corresponds to the **argument name** in the source function.
-Below `api_secret_key` [will get its value](../general-usage/credentials/advanced)
+Below, `api_secret_key` [will get its value](../general-usage/credentials/advanced)
 from `secrets.toml` when `github_api_source()` is called.
 
 ```py
@@ -80,11 +80,11 @@ python github_api.py
 
 Your API key should be printed out to stdout along with some test data.
 
-## 3. Request project issues from then GitHub API
+## 3. Request project issues from the GitHub API
 
 
 :::tip
-We will use `dlt` repository as an example GitHub project https://github.com/dlt-hub/dlt, feel free to replace it with your own repository.
+We will use the `dlt` repository as an example GitHub project https://github.com/dlt-hub/dlt, feel free to replace it with your own repository.
 :::
 
 Modify `github_api_resource` in `github_api.py` to request issues data from your GitHub project's API:
@@ -109,7 +109,7 @@ def github_api_resource(api_secret_key: str = dlt.secrets.value):
 
 ## 4. Load the data
 
-Uncomment the commented out code in `main` function in `github_api.py`, so that running the
+Uncomment the commented-out code in the `main` function in `github_api.py`, so that running the
 `python github_api.py` command will now also run the pipeline:
 
 ```py
@@ -143,12 +143,12 @@ python github_api.py
 
 This should print out JSON data containing the issues in the GitHub project.
 
-It also prints `load_info` object.
+It also prints the `load_info` object.
 
 Let's explore the loaded data with the [command](../reference/command-line-interface#show-tables-and-data-in-the-destination) `dlt pipeline <pipeline_name> show`.
 
 :::info
-Make sure you have `streamlit` installed `pip install streamlit`
+Make sure you have `streamlit` installed: `pip install streamlit`
 :::
 
 ```sh
@@ -162,14 +162,8 @@ This will open a Streamlit app that gives you an overview of the data loaded.
 With a functioning pipeline, consider exploring:
 
 - Our [REST Client](../general-usage/http/rest-client).
-- [Deploy this pipeline with GitHub Actions](deploy-a-pipeline/deploy-with-github-actions), so that
-  the data is automatically loaded on a schedule.
-- Transform the [loaded data](../dlt-ecosystem/transformations) with dbt or in
-  Pandas DataFrames.
-- Learn how to [run](../running-in-production/running),
-  [monitor](../running-in-production/monitoring), and
-  [alert](../running-in-production/alerting) when you put your pipeline in production.
-- Try loading data to a different destination like
-  [Google BigQuery](../dlt-ecosystem/destinations/bigquery),
-  [Amazon Redshift](../dlt-ecosystem/destinations/redshift), or
-  [Postgres](../dlt-ecosystem/destinations/postgres).
+- [Deploy this pipeline with GitHub Actions](deploy-a-pipeline/deploy-with-github-actions), so that the data is automatically loaded on a schedule.
+- Transform the [loaded data](../dlt-ecosystem/transformations) with dbt or in Pandas DataFrames.
+- Learn how to [run](../running-in-production/running), [monitor](../running-in-production/monitoring), and [alert](../running-in-production/alerting) when you put your pipeline in production.
+- Try loading data to a different destination like [Google BigQuery](../dlt-ecosystem/destinations/bigquery), [Amazon Redshift](../dlt-ecosystem/destinations/redshift), or [Postgres](../dlt-ecosystem/destinations/postgres).
+
diff --git a/docs/website/docs/walkthroughs/create-new-destination.md b/docs/website/docs/walkthroughs/create-new-destination.md
index f4cdef8fda..5e3b4b28cc 100644
--- a/docs/website/docs/walkthroughs/create-new-destination.md
+++ b/docs/website/docs/walkthroughs/create-new-destination.md
@@ -1,186 +1,185 @@
 # Create new destination
 
 :::tip
-You can use `@dlt.destination` decorator and [implement a sink function](../dlt-ecosystem/destinations/destination.md). This is a perfect way to implement reverse ETL components that push data back to REST APIs.
+You can use the `@dlt.destination` decorator and [implement a sink function](../dlt-ecosystem/destinations/destination.md). This is a perfect way to implement reverse ETL components that push data back to REST APIs.
 :::
 
-`dlt` can import destinations from external python modules. Below we show how to quickly add a [dbapi](https://peps.python.org/pep-0249/) based destination. `dbapi` is a standardized interface to access
-databases in Python. If you used ie. postgres (ie. `psycopg2`) you are already familiar with it.
+`dlt` can import destinations from external Python modules. Below, we show how to quickly add a [dbapi](https://peps.python.org/pep-0249/) based destination. `dbapi` is a standardized interface for accessing
+databases in Python. If you have used, for example, PostgreSQL (i.e., `psycopg2`), you are already familiar with it.
 
-> 🧪 This guide is not comprehensive. The internal interfaces are still evolving. Besides reading info below, you should check out [source code of existing destinations](https://github.com/dlt-hub/dlt/tree/devel/dlt/destinations/impl)
+> 🧪 This guide is not comprehensive. The internal interfaces are still evolving. Besides reading the information below, you should check out the [source code of existing destinations](https://github.com/dlt-hub/dlt/tree/devel/dlt/destinations/impl).
 
 ## 0. Prerequisites
 
-Destinations are implemented in python packages under: `dlt.destinations.impl.<destination_name>`. Generally a destination consists of the following modules:
+Destinations are implemented in Python packages under: `dlt.destinations.impl.<destination_name>`. Generally, a destination consists of the following modules:
 
-* `__init__.py` - this module contains the destination capabilities
-* `<destination_name>.py` - this module contains the job client and load job implementations for the destination
-* `configuration.py` - this module contains the destination and credentials configuration classes
-* `sql_client.py` - this module contains the SQL client implementation for the destination, this is a wrapper over `dbapi` that provides consistent interface to `dlt` for executing queries
-* `factory.py` - this module contains a `Destination` subclass that is the entry point for the destination.
+* `__init__.py` - This module contains the destination capabilities.
+* `<destination_name>.py` - This module contains the job client and load job implementations for the destination.
+* `configuration.py` - This module contains the destination and credentials configuration classes.
+* `sql_client.py` - This module contains the SQL client implementation for the destination, which is a wrapper over `dbapi` that provides a consistent interface to `dlt` for executing queries.
+* `factory.py` - This module contains a `Destination` subclass that is the entry point for the destination.
 
-## 1. Copy existing destination to your `dlt` project
-Initialize a new project with [dlt init](../reference/command-line-interface.md#dlt-init)
+## 1. Copy an existing destination to your `dlt` project
+Initialize a new project with [dlt init](../reference/command-line-interface.md#dlt-init):
 ```sh
 dlt init github postgres
 ```
-This adds `github` verified source (it produces quite complicated datasets and that good for testing, does not require credentials to use) and `postgres` credentials (connection-string-like) that we'll repurpose later.
+This adds a `github` verified source (it produces quite complicated datasets and that's good for testing; it does not require credentials to use) and `postgres` credentials (connection-string-like) that we'll repurpose later.
 
-Clone [dlt](https://github.com/dlt-hub/dlt) repository to a separate folder. In the repository look for **dlt/destinations/impl** folder and copy one of the destinations to your project. Pick your starting point:
-* **postgres** - a simple destination without staging storage support and COPY jobs
-* **redshift** - based on postgres, adds staging storage support and remote COPY jobs
-* **snowflake** - a destination supporting additional authentication schemes, local and remote COPY jobs and no support for direct INSERTs
-
-Below we'll use **postgres** as starting point.
+Clone the [dlt](https://github.com/dlt-hub/dlt) repository to a separate folder. In the repository, look for the **dlt/destinations/impl** folder and copy one of the destinations to your project. Pick your starting point:
+* **postgres** - A simple destination without staging storage support and COPY jobs.
+* **redshift** - Based on postgres, adds staging storage support and remote COPY jobs.
+* **snowflake** - A destination supporting additional authentication schemes, local and remote COPY jobs, and no support for direct INSERTs.
 
+Below, we'll use **postgres** as the starting point.
 
 ## 2. Adjust the destination configuration and credentials
-`dbapi` based destinations use `ConnectionStringCredentials` as a credentials base which accepts SQLAlchemy style connection strings. Typically you should derive from it to change the `drivername` and make desired properties (like `host` or `password`) mandatory.
+`dbapi` based destinations use `ConnectionStringCredentials` as a credentials base, which accepts SQLAlchemy-style connection strings. Typically, you should derive from it to change the `drivername` and make desired properties (like `host` or `password`) mandatory.
 
-We keep config and credentials in `configuration.py`. You should:
-- rename the classes properly to match your destination name
-- if you need more properties (ie. look at `iam_role` in `redshift` credentials) then add them, remember about typing. Behind the hood credentials and configs are **dataclasses**.
-- adjust `__init__` arguments in your `Destination` class in `factory.py` to match the new credentials and config classes
-- expose the configuration type in `spec` attribute in `factory.py`
+We keep the config and credentials in `configuration.py`. You should:
+- Rename the classes properly to match your destination name.
+- If you need more properties (e.g., look at `iam_role` in `redshift` credentials), then add them, remembering about typing. Behind the hood, credentials and configs are **dataclasses**.
+- Adjust the `__init__` arguments in your `Destination` class in `factory.py` to match the new credentials and config classes.
+- Expose the configuration type in the `spec` attribute in `factory.py`.
 
-> 💡 Each destination implements `Destination` abstract class defined in [reference.py](https://github.com/dlt-hub/dlt/blob/devel/dlt/common/destination/reference.py).
+> 💡 Each destination implements the `Destination` abstract class defined in [reference.py](https://github.com/dlt-hub/dlt/blob/devel/dlt/common/destination/reference.py).
 
-> 💡 See how `snowflake` destination adds additional authorization methods and configuration options.
+> 💡 See how the `snowflake` destination adds additional authorization methods and configuration options.
 
 ## 3. Set the destination capabilities
-`dlt` needs to know a few things about the destination to correctly work with it. Those are stored in `capabilities()` function in `__init__.py`.
-
-* supported loader file formats both for direct and staging loading (see below)
-* `escape_identifier` a function that escapes database identifiers ie. table or column name. Look in `dlt.common.data_writers.escape` module to see how this is implemented for existing destinations.
-* `escape_literal` a function that escapes string literal. it is only used if destination supports **insert-values** loader format (also see existing implementations in `dlt.common.data_writers.escape`)
-* `decimal_precision` precision and scale of decimal/numeric types. also used to create right decimal types in loader files ie. parquet
-* `wei_precision` precision and scale of decimal/numeric to store very large (up to 2**256) integers. specify maximum precision for scale 0
-* `max_identifier_length` max length of table and schema/dataset names
-* `max_column_identifier_length` max length of column name
-* `naming_convention` a name or naming convention module that maps the input alphabet (ie. JSON identifiers) to destination alphabet. leave the default - it is [very conservative](../general-usage/schema.md#naming-convention)
-* `max_query_length`, `is_max_query_length_in_bytes`, `max_text_data_type_length`, `is_max_text_data_type_length_in_bytes` - tells `dlt` the maximum length of text query and of text data types.
-* `supports_transactions` tells if destination supports transactions
-* `timestamp_precision` sets fidelity of timestamp/datetime type: 0 - 9 (from seconds to nanoseconds), default is 6
-* `supports_ddl_transactions` tells if the destination supports ddl transactions.
-* `alter_add_multi_column` tells if destination can add multiple columns in **ALTER** statement
-* `supports_truncate_command` tells dlt if **truncate** command is used, otherwise it will use **DELETE** to clear tables.
-* `schema_supports_numeric_precision` whether numeric data types support precision/scale configuration
-* `max_rows_per_insert` max number of rows supported per insert statement, used with `insert-values` loader file format (set to `None` for no limit). E.g. MS SQL has a limit of 1000 rows per statement, but most databases have no limit and the statement is divided according to `max_query_length`.
+`dlt` needs to know a few things about the destination to correctly work with it. These are stored in the `capabilities()` function in `__init__.py`.
+
+* Supported loader file formats both for direct and staging loading (see below).
+* `escape_identifier`, a function that escapes database identifiers, i.e., table or column names. Look in the `dlt.common.data_writers.escape` module to see how this is implemented for existing destinations.
+* `escape_literal`, a function that escapes string literals. It is only used if the destination supports the **insert-values** loader format (also see existing implementations in `dlt.common.data_writers.escape`).
+* `decimal_precision`, precision and scale of decimal/numeric types. Also used to create the right decimal types in loader files, i.e., Parquet.
+* `wei_precision`, precision and scale of decimal/numeric to store very large (up to 2**256) integers. Specify maximum precision for scale 0.
+* `max_identifier_length`, maximum length of table and schema/dataset names.
+* `max_column_identifier_length`, maximum length of column names.
+* `naming_convention`, a name or naming convention module that maps the input alphabet (i.e., JSON identifiers) to the destination alphabet. Leave the default - it is [very conservative](../general-usage/schema.md#naming-convention).
+* `max_query_length`, `is_max_query_length_in_bytes`, `max_text_data_type_length`, `is_max_text_data_type_length_in_bytes` - tells `dlt` the maximum length of text queries and of text data types.
+* `supports_transactions` tells if the destination supports transactions.
+* `timestamp_precision` sets the fidelity of the timestamp/datetime type: 0 - 9 (from seconds to nanoseconds), default is 6.
+* `supports_ddl_transactions` tells if the destination supports DDL transactions.
+* `alter_add_multi_column` tells if the destination can add multiple columns in an **ALTER** statement.
+* `supports_truncate_command` tells `dlt` if the **truncate** command is used, otherwise it will use **DELETE** to clear tables.
+* `schema_supports_numeric_precision` whether numeric data types support precision/scale configuration.
+* `max_rows_per_insert`, maximum number of rows supported per insert statement, used with the `insert-values` loader file format (set to `None` for no limit). E.g., MS SQL has a limit of 1000 rows per statement, but most databases have no limit and the statement is divided according to `max_query_length`.
 
 ### Supported loader file formats
-Specify which [loader file formats](../dlt-ecosystem/file-formats/) your destination will support directly and via [storage staging](../dlt-ecosystem/staging.md). Direct support means that destination is able to load a local file or supports INSERT command. Loading via staging is using `filesystem` to send load package to a (typically) bucket storage and then load from there.
+Specify which [loader file formats](../dlt-ecosystem/file-formats/) your destination will support directly and via [storage staging](../dlt-ecosystem/staging.md). Direct support means that the destination is able to load a local file or supports the INSERT command. Loading via staging involves using `filesystem` to send the load package to a (typically) bucket storage and then load from there.
 
-> 💡 the [insert-values](../dlt-ecosystem/file-formats/insert-format.md) data format generates large INSERT statement that are executed on the destination. If you have any other option for local loading, avoid using this format. It is typically slower and requires the use of bullet-proof `escape_literal` function.
+> 💡 The [insert-values](../dlt-ecosystem/file-formats/insert-format.md) data format generates large INSERT statements that are executed on the destination. If you have any other option for local loading, avoid using this format. It is typically slower and requires the use of the bullet-proof `escape_literal` function.
 
-* `preferred_loader_file_format` - a file format that will be used by default to load data from local file system. Set to `None` if direct loading is not supported.
-* `supported_loader_file_formats` - file formats that can be loaded from local file system to destination. Set to `[]` if direct loading is not supported.
-* `preferred_staging_file_format` - a file format that will be used by default when `staging` is enabled. Set to `None` if destination can't load from staging.
-* `supported_staging_file_formats` - file formats that are supported to be loaded staging storage. Set to `[]` if destination can't load from staging.
+* `preferred_loader_file_format` - a file format that will be used by default to load data from the local file system. Set to `None` if direct loading is not supported.
+* `supported_loader_file_formats` - file formats that can be loaded from the local file system to the destination. Set to `[]` if direct loading is not supported.
+* `preferred_staging_file_format` - a file format that will be used by default when `staging` is enabled. Set to `None` if the destination can't load from staging.
+* `supported_staging_file_formats` - file formats that are supported to be loaded from staging storage. Set to `[]` if the destination can't load from staging.
 
-> 💡 Mind that for each file type you'll need to implement a load job (which in most cases is a `COPY` command to which you pass a file path and file type)
+> 💡 Mind that for each file type, you'll need to implement a load job (which in most cases is a `COPY` command to which you pass a file path and file type).
 
 > 💡 Postgres does not support staging and any other file format beyond **insert-values**. Check the `snowflake` capabilities for a destination that supports all possible formats.
 
 ### Escape identifiers and literals
-The default `escape_identifier` function identifier escapes `"` and '\' and quotes identifier with `"`. This is standard SQL behavior. Mind that if you use default naming convention, `dlt` normalizes identifiers to an alphabet that does not accept any special characters. Users are able to change the naming convention in the configuration so correct escape function is still important.
+The default `escape_identifier` function escapes `"` and '\\' and quotes the identifier with `"`. This is standard SQL behavior. Mind that if you use the default naming convention, `dlt` normalizes identifiers to an alphabet that does not accept any special characters. Users are able to change the naming convention in the configuration, so the correct escape function is still important.
 
-> 💡 **postgres** destination that you modify is using standard implementation that you may keep.
+> 💡 The **postgres** destination that you modify is using a standard implementation that you may keep.
 
 You should avoid providing a custom `escape_literal` function by not enabling `insert-values` for your destination.
 
-### Enable / disable case sensitive identifiers
-Specify if destination supports case sensitive identifiers by setting `has_case_sensitive_identifiers` to `True` (or `False` if otherwise). Some case sensitive destinations (ie. **Snowflake** or **Postgres**) support case insensitive identifiers via. case folding ie. **Snowflake** considers all upper case identifiers as case insensitive (set `casefold_identifier` to `str.upper`), **Postgres** does the same with lower case identifiers (`str.lower`).
-Some case insensitive destinations (ie. **Athena** or **Redshift**) case-fold (ie. lower case) all identifiers and store them as such. In that case set `casefold_identifier` to `str.lower` as well.
+### Enable / disable case-sensitive identifiers
+Specify if the destination supports case-sensitive identifiers by setting `has_case_sensitive_identifiers` to `True` (or `False` if otherwise). Some case-sensitive destinations (i.e., **Snowflake** or **Postgres**) support case-insensitive identifiers via case folding; i.e., **Snowflake** considers all upper-case identifiers as case-insensitive (set `casefold_identifier` to `str.upper`), **Postgres** does the same with lower-case identifiers (`str.lower`).
+Some case-insensitive destinations (i.e., **Athena** or **Redshift**) case-fold (i.e., lower case) all identifiers and store them as such. In that case, set `casefold_identifier` to `str.lower` as well.
 
-## 4. Adjust the SQL client
-**sql client** is a wrapper over `dbapi` and its main role is to provide consistent interface for executing SQL statements, managing transactions and (probably the most important) to help handling errors via classifying exceptions. Here's a few things you should pay attention to:
+## Adjust the SQL client
+The **SQL client** is a wrapper over `dbapi` and its main role is to provide a consistent interface for executing SQL statements, managing transactions, and (probably the most important) helping handle errors via classifying exceptions. Here are a few things you should pay attention to:
 
-- When opening the connection: add current dataset name to search path, set session timezone to UTC.
-- Transactions: typically to begin a transaction, you need to disable the auto-commit (like `postgres` implementation does)
-- `execute_query`: dlt uses `%s` to represent dbi api query parameters. see `duckdb sql_client for a crude way to align your `dbapi` client if it uses other parameter placeholders.
-- `execute_fragments`: if your `dbapi` client does not provide a method to join SQL fragments without full string copy, just delete `postgres` override. The base class just joins strings.
+- When opening the connection: add the current dataset name to the search path, set the session timezone to UTC.
+- Transactions: typically, to begin a transaction, you need to disable auto-commit (like the `postgres` implementation does).
+- `execute_query`: dlt uses `%s` to represent dbi API query parameters. See `duckdb sql_client` for a crude way to align your `dbapi` client if it uses other parameter placeholders.
+- `execute_fragments`: if your `dbapi` client does not provide a method to join SQL fragments without a full string copy, just delete the `postgres` override. The base class just joins strings.
 
 ### Fully qualified names
-When created, `sql_client` is bound to particular dataset name (which typically corresponds to a database schema). Most of the database engines follow usual rules of qualifying and quoting (**"schema"."table"."column"**) but there are exceptions like `BigQuery` or `Motherduck`. You have full control over generating identifiers via:
+When created, `sql_client` is bound to a particular dataset name (which typically corresponds to a database schema). Most of the database engines follow the usual rules of qualifying and quoting (**"schema"."table"."column"**), but there are exceptions like `BigQuery` or `Motherduck`. You have full control over generating identifiers via:
 * `fully_qualified_dataset_name` returns a fully qualified dataset name.
-* `make_qualified_table_name` same but for a given table name
+* `make_qualified_table_name` does the same but for a given table name.
 
 ### `dbapi` exceptions
-`dlt` must be able to distinct a few error cases for the loading to work properly. Unfortunately error reporting is not very well defined by `dbapi` and even the existing exception tree is not used consistently across implementations.
+`dlt` must be able to distinguish a few error cases for the loading to work properly. Unfortunately, error reporting is not very well defined by `dbapi`, and even the existing exception tree is not used consistently across implementations.
 
-`_make_database_exception` method wraps incoming `Exception` in one of exception types required by `dlt`:
-- `DatabaseUndefinedRelation`: raised when schema or table that `dlt` tries to reference is undefined. It is important to detect this case exactly: via specific `dbapi` exceptions (like in case of `postgres` and `duckdb`) or via detecting proper category of exceptions and inspecting the error codes or messages (see. `redshift` and `snowflake`)
-- `DatabaseTerminalException`: errors during loading that will permanently fail a job and should not retry. `IntegrityError`, `ProgrammingError` and most of the `DataError` belong to this class. (example: decimal value out of range, insert NULL in non NULL columns)
-- `DatabaseTransientException`: all other exceptions. we also include `SyntaxError` (if exists in particular `dbapi` implementation) here
+`_make_database_exception` method wraps an incoming `Exception` in one of the exception types required by `dlt`:
+- `DatabaseUndefinedRelation`: raised when a schema or table that `dlt` tries to reference is undefined. It is important to detect this case exactly: via specific `dbapi` exceptions (like in the case of `postgres` and `duckdb`) or by detecting the proper category of exceptions and inspecting the error codes or messages (see `redshift` and `snowflake`).
+- `DatabaseTerminalException`: errors during loading that will permanently fail a job and should not retry. `IntegrityError`, `ProgrammingError`, and most of the `DataError` belong to this class. (example: decimal value out of range, insert NULL in non-NULL columns)
+- `DatabaseTransientException`: all other exceptions. We also include `SyntaxError` (if it exists in a particular `dbapi` implementation) here.
 
-> 💡 How this works in practice: we have a set of tests for all relevant error cases in [test_sql_client.py](https://github.com/dlt-hub/dlt/blob/devel/tests/load/test_sql_client.py), this way we make sure that new sql_client behaves correctly.
+> 💡 How this works in practice: we have a set of tests for all relevant error cases in [test_sql_client.py](https://github.com/dlt-hub/dlt/blob/devel/tests/load/test_sql_client.py), this way we make sure that the new sql_client behaves correctly.
 
-### What base class assumes
+### What the base class assumes
  - that `INFORMATION_SCHEMA` exists from which we can take basic information on `SCHEMATA` and `COLUMNS`
  - `CREATE SCHEMA` and `DROP SCHEMA` (see how `BigQuery` overrides that)
  - `DELETE` or `TRUNCATE` is available to clear tables without dropping
  - `DROP TABLE` only for CLI command (`pipeline drop`)
 
-## 5. Adjust the job client
-Job client is responsible for creating/starting load jobs and managing the schema updates. Here we'll adjust the `SqlJobClientBase` base class which uses the `sql_client` to manage the destination. Typically only a few methods needs to be overridden by a particular implementation. The job client code customarily resides in a file with name `<destination_name>.py` ie. `postgres.py` and is exposed in `factory.py` by the `client_class` property on the destination class.
+## Adjust the job client
+The job client is responsible for creating/starting load jobs and managing the schema updates. Here we'll adjust the `SqlJobClientBase` base class which uses the `sql_client` to manage the destination. Typically, only a few methods need to be overridden by a particular implementation. The job client code customarily resides in a file named `<destination_name>.py`, i.e., `postgres.py`, and is exposed in `factory.py` by the `client_class` property on the destination class.
 
 ### Database type mappings
-You must map `dlt` data types to destination data types. For this you can implement a subclass of `TypeMapper`. You can specify there dicts to map `dlt` data types to destination data types, with or without precision. A few tricks to remember:
+You must map `dlt` data types to destination data types. For this, you can implement a subclass of `TypeMapper`. You can specify there dicts to map `dlt` data types to destination data types, with or without precision. A few tricks to remember:
 * the database types must be exactly those as used in `INFORMATION_SCHEMA.COLUMNS`
 * decimal precision and scale are filled from the capabilities (in all our implementations)
-* until now all destinations could handle binary types
-* we always try to map the `json` type into `JSON` type in the destination. if that does not work you can try mapping into a string. See how we do that for various destinations.
-* the reverse mapping of types is sometimes tricky ie. you may not able to detect nested types (your destination lacks JSON support). this is not really needed during schema updates and loading (just for testing) so in general you should be fine.
+* until now, all destinations could handle binary types
+* we always try to map the `json` type into `JSON` type in the destination. If that does not work, you can try mapping it into a string. See how we do that for various destinations.
+* the reverse mapping of types is sometimes tricky, i.e., you may not be able to detect nested types (your destination lacks JSON support). This is not really needed during schema updates and loading (just for testing), so in general, you should be fine.
 
 ### Table and column hints
-You can map hints present for tables and columns (ie. `cluster`, `sort`, `partition`) to generate specific DDL for columns and tables. See `_get_column_def_sql` in various destinations.
-You can also add hints (ie indexes, partition clauses) to tables via `_get_table_update_sql` - see `BigQuery` implementation for a good example.
+You can map hints present for tables and columns (i.e., `cluster`, `sort`, `partition`) to generate specific DDL for columns and tables. See `_get_column_def_sql` in various destinations.
+You can also add hints (i.e., indexes, partition clauses) to tables via `_get_table_update_sql` - see the `BigQuery` implementation for a good example.
 
 ### Participate in staging dataset merge and replace
-`dlt` supports merging and transactional replace via **staging dataset** living along the destination dataset. `SqlJobClientBase` participates in this mechanism by default. In essence: each time when a job is completed, `dlt` checks which table got updated and if there are no remaining jobs for that table and its nested and root tables (all together called **table chain**). If table chain is fully loaded, `dlt` executes SQL transformations that move/merge data from staging dataset to destination dataset (that, as you can expect, happens also via jobs, of type `sql` that are dynamically created).
+`dlt` supports merging and transactional replace via a **staging dataset** living alongside the destination dataset. `SqlJobClientBase` participates in this mechanism by default. In essence, each time a job is completed, `dlt` checks which table was updated and if there are no remaining jobs for that table and its nested and root tables (all together called **table chain**). If the table chain is fully loaded, `dlt` executes SQL transformations that move/merge data from the staging dataset to the destination dataset (that, as you can expect, happens also via jobs, of type `sql` that are dynamically created).
 
-Generated SQL is quite simple and we were able to run it on all existing destinations (we may introduce `sqlglot` to handle future cases). The SQL used requires:
+The generated SQL is quite simple, and we were able to run it on all existing destinations (we may introduce `sqlglot` to handle future cases). The SQL used requires:
 - SELECT, INSERT, DELETE/TRUNCATE statements
 - WINDOW functions for merge.
 
-In case of destinations that do not allow the data modifications you can opt out from both replace and merge:
-- override `get_truncate_destination_table_dispositions` method and return empty list so your tables are never truncated
-- override `get_stage_dispositions` and return empty list to opt out from any operations on staging dataset.
+In case of destinations that do not allow data modifications, you can opt out from both replace and merge:
+- Override the `get_truncate_destination_table_dispositions` method and return an empty list so your tables are never truncated.
+- Override the `get_stage_dispositions` and return an empty list to opt out from any operations on the staging dataset.
 
-### What base class assumes
-* DDL to create and add column to tables is available
-* it is possible to SELECT data
-* it is possible to INSERT data (in order to complete package and store the updated schema)
+### What the base class assumes
+* DDL to create and add columns to tables is available.
+* It is possible to SELECT data.
+* It is possible to INSERT data (in order to complete the package and store the updated schema).
 
-> 💡 talk to us on slack if your destination is fully read only.
+> 💡 Talk to us on Slack if your destination is fully read-only.
 
 
 ## 6. Implement load jobs
-Load jobs make sure that all files in load package are loaded to destination. `dlt` creates single job per file and [makes sure that it transitions to `completed` state. (look for `LoadJob`)](https://github.com/dlt-hub/dlt/blob/devel/dlt/common/destination/reference.py)
+Load jobs ensure that all files in the load package are loaded to the destination. `dlt` creates a single job per file and [makes sure that it transitions to the `completed` state. (look for `LoadJob`)](https://github.com/dlt-hub/dlt/blob/devel/dlt/common/destination/reference.py)
 
-The file name of the job is used as the job id and both sync and async execution is supported. The executor is multi-threaded. Each job starts in separate thread and its completion status is checked from the main thread.
+The file name of the job is used as the job ID, and both sync and async execution is supported. The executor is multi-threaded. Each job starts in a separate thread, and its completion status is checked from the main thread.
 
 Jobs are typically very simple and just execute INSERT or COPY commands. They do not replace nor merge data themselves.
 
 ### Enable insert jobs
-If you use **insert-values** loader file format then derive your job client from `InsertValuesJobClient`. `postgres.py` does exactly that.
+If you use the **insert-values** loader file format, then derive your job client from `InsertValuesJobClient`. `postgres.py` does exactly that.
 
-Look at `snowflake.py` for a destination that does not use **insert-values**.
+Look at `snowflake.py` for a destination that does not use the **insert-values**.
 
 ### Copy jobs from local and remote files
-`dlt` allows to chain two destinations to create a [storage stage (typically on a bucket)](../dlt-ecosystem/staging.md). The staging destination (currently `filesystem`) will copy new files, complete the corresponding jobs and for each of them it will create **reference job** that will be passed to a destination to execute.
+`dlt` allows chaining two destinations to create a [storage stage (typically on a bucket)](../dlt-ecosystem/staging.md). The staging destination (currently `filesystem`) will copy new files, complete the corresponding jobs, and for each of them, it will create a **reference job** that will be passed to a destination to execute.
 
 The `postgres` destination does not implement any copy jobs.
-- See `RedshiftCopyFileLoadJob` in `redshift.py` how we create and start a copy job from a bucket. It uses `CopyRemoteFileLoadJob` base to handle the references and creates a `COPY` SQL statement in `execute()` method.
-- See `SnowflakeLoadJob` in `snowflake.py` how to implement a job that can load local and reference files. It also forwards AWS credentials from staging destination. At the end the code just generates a COPY command for various loader file formats.
+- See `RedshiftCopyFileLoadJob` in `redshift.py` for how we create and start a copy job from a bucket. It uses `CopyRemoteFileLoadJob` base to handle the references and creates a `COPY` SQL statement in the `execute()` method.
+- See `SnowflakeLoadJob` in `snowflake.py` for how to implement a job that can load local and reference files. It also forwards AWS credentials from the staging destination. At the end, the code just generates a COPY command for various loader file formats.
 
 ## 7. Expose your destination to dlt
 
 The `Destination` subclass in `dlt.destinations.impl.<destination_name>.factory` module is the entry point for the destination.
-Add an import to your factory in [`dlt.destinations.__init__`](https://github.com/dlt-hub/dlt/blob/devel/dlt/destinations/__init__.py). `dlt` looks in this module when you reference a destination by name, i.e. `dlt.pipeline(..., destination="postgres")`.
+Add an import to your factory in [`dlt.destinations.__init__`](https://github.com/dlt-hub/dlt/blob/devel/dlt/destinations/__init__.py). `dlt` looks in this module when you reference a destination by name, i.e., `dlt.pipeline(..., destination="postgres")`.
 
 ## Testing
-We can quickly repurpose existing github source and `secrets.toml` already present in the project to test new destination. Let's assume that the module name is `presto`, same for the destination name and config section name. Here's our testing script `github_pipeline.py`
+We can quickly repurpose existing GitHub source and `secrets.toml` already present in the project to test a new destination. Let's assume that the module name is `presto`, the same for the destination name and config section name. Here's our testing script `github_pipeline.py`:
 ```py
 import dlt
 
@@ -188,7 +187,7 @@ from github import github_repo_events
 from presto import presto  # importing destination factory
 
 def load_airflow_events() -> None:
-    """Loads airflow events. Shows incremental loading. Forces anonymous access token"""
+    """Loads airflow events. Shows incremental loading. Forces anonymous access token."""
     pipeline = dlt.pipeline(
         "github_events", destination=presto(), dataset_name="airflow_events"
     )
@@ -211,7 +210,7 @@ host = "localhost"
 port = 5432
 ```
 
-Mind that in the script above we import the `presto` module and then pass it in `destination` argument to `dlt.pipeline`. Github pipeline will load the events in `append` mode. You may force `replace` and `merge` modes in `pipeline.run` to check more advanced behavior of the destination.
+Note that in the script above, we import the `presto` module and then pass it in the `destination` argument to `dlt.pipeline`. The GitHub pipeline will load the events in `append` mode. You may force `replace` and `merge` modes in `pipeline.run` to check more advanced behavior of the destination.
 
 After executing the pipeline script:
 ```text
@@ -225,4 +224,5 @@ The presto destination used postgres://loader:***@localhost:5432/dlt_data locati
 Load package 1690628947.953597 is LOADED and contains no failed jobs
 ```
 
-you can use `dlt pipeline show github_events` to view data in the destination.
+You can use `dlt pipeline show github_events` to view data in the destination.
+
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-gcp-cloud-function-as-webhook.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-gcp-cloud-function-as-webhook.md
index cae8a7414d..2b7e2049ee 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-gcp-cloud-function-as-webhook.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-gcp-cloud-function-as-webhook.md
@@ -1,16 +1,16 @@
-# Deploy GCP Cloud Function as a Webhook
+# Deploy GCP Cloud Function as a webhook
 
 A webhook is a way for one application to send automated messages or data to another application in real time. Unlike traditional APIs, which require constant polling for updates, webhooks allow applications to push information instantly as soon as an event occurs. This event-driven architecture enables faster and more responsive interactions between systems, saving valuable resources and improving overall system performance.
 
-With this `dlt` google cloud event ingestion webhook, you can ingest the data and load it to the destination in real time as soon as a post request is triggered by the webhook. You can use this cloud function as an event ingestion webhook on various platforms such as Slack, Discord, Stripe, PayPal and any other as per your requirement.
+With this `dlt` Google Cloud event ingestion webhook, you can ingest the data and load it to the destination in real time as soon as a post request is triggered by the webhook. You can use this cloud function as an event ingestion webhook on various platforms such as Slack, Discord, Stripe, PayPal, and any other as per your requirement.
 
-You can setup GCP cloud function webhook using `dlt` as follows:
+You can set up a GCP cloud function webhook using `dlt` as follows:
 
 ## 1. **Initialize deployment**
 
 1. Sign in to your GCP account and enable the Cloud Functions API.
 2. Go to the Cloud Functions section and click Create Function. Set up the environment and select the region.
-3. Configure the trigger type, you can use any trigger but for this example we will use HTTP and select "Allow unauthenticated invocations".
+3. Configure the trigger type; you can use any trigger, but for this example, we will use HTTP and select "Allow unauthenticated invocations".
 4. Click "Save" and then "Next".
 5. Select "Python 3.10" as the environment.
 6. Use the code provided to set up the cloud function for event ingestion:
@@ -74,4 +74,5 @@ else:
 ```
 
 > Replace the webhook_url with the Trigger URL for the cloud function created.
-Now after setting up the webhook using cloud functions, every time an event occurs, the data will be ingested into your specified destination.
+Now, after setting up the webhook using cloud functions, every time an event occurs, the data will be ingested into your specified destination.
+
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md
index 8dd86c5172..0b4e0daffc 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md
@@ -10,7 +10,7 @@ Before you can deploy a pipeline, you will need to [install dlt](../../reference
 and [create a pipeline](../create-a-pipeline.md).
 
 :::tip
-While this walkthrough deals specifically with Google Composer, it will generate DAGs and configuration files that you can use on any Airflow deployment. DAGs are generated using **dlt Airflow helper** that maps `dlt` resources into Airflow tasks, provides clean working environment, retry mechanism, metrics and logging via Airflow loggers.
+While this walkthrough deals specifically with Google Composer, it will generate DAGs and configuration files that you can use on any Airflow deployment. DAGs are generated using **dlt Airflow helper** that maps `dlt` resources into Airflow tasks, provides a clean working environment, retry mechanism, metrics, and logging via Airflow loggers.
 :::
 
 ## 1. Add your `dlt` project directory to GitHub
@@ -30,7 +30,8 @@ python3 {pipeline_name}_pipeline.py
 This should successfully load data from the source to the destination once and allows `dlt` to gather required information for the deployment.
 
 ## 3. Initialize deployment
-First you need to add additional dependencies that `deploy` command requires:
+
+First, you need to add additional dependencies that the `deploy` command requires:
 ```sh
 pip install "dlt[cli]"
 ```
@@ -64,6 +65,8 @@ By default, the `dlt deploy` command shows you the deployment credentials in ENV
 
 ## Example with the pipedrive pipeline
 
+
+
 ### 1. Run the deploy command
 ```sh
 dlt deploy pipedrive_pipeline.py airflow-composer
@@ -71,26 +74,26 @@ dlt deploy pipedrive_pipeline.py airflow-composer
 where `pipedrive_pipeline.py` is the pipeline script that you just ran and `airflow-composer` is a deployment method. The command will create deployment files and provide instructions to set up the credentials.
 
 ```text
-Your airflow-composer deployment for pipeline pipedrive is ready!
-* The airflow cloudbuild.yaml file was created in build.
-* The dag_pipedrive.py script was created in dags.
+Your airflow-composer deployment for the pipedrive pipeline is ready!
+* The airflow cloudbuild.yaml file was created in the build directory.
+* The dag_pipedrive.py script was created in the dags directory.
 
 You must prepare your repository first:
-1. Import your sources in dag_pipedrive.py, change default_task_args if necessary.
-2. Run airflow pipeline locally.
+1. Import your sources in dag_pipedrive.py and change default_task_args if necessary.
+2. Run the airflow pipeline locally.
 See Airflow getting started: https://airflow.apache.org/docs/apache-airflow/stable/start.html
 
-If you are planning run the pipeline with Google Cloud Composer, follow the next instructions:
+If you are planning to run the pipeline with Google Cloud Composer, follow the next instructions:
 
-1. Read this doc and set up the Environment: https://dlthub.com/docs/running-in-production/orchestrators/airflow-gcp-cloud-composer
-2. Set _BUCKET_NAME up in build/cloudbuild.yaml file.
+1. Read this doc and set up the environment: https://dlthub.com/docs/running-in-production/orchestrators/airflow-gcp-cloud-composer
+2. Set _BUCKET_NAME up in the build/cloudbuild.yaml file.
 3. Add the following toml-string to the Airflow UI as the dlt_secrets_toml variable.
 
 [sources.pipedrive]
 pipedrive_api_key = "c66..."
 ```
 
-> 💡 `deploy` command will use [Airflow variable](#4-add-credentials) called `dlt_secrets_toml` to store all the required secrets as `toml` fragment. You can also use **environment variables** by passing `--secrets-format env` option:
+> 💡 The `deploy` command will use an [Airflow variable](#4-add-credentials) called `dlt_secrets_toml` to store all the required secrets as a `toml` fragment. You can also use **environment variables** by passing the `--secrets-format env` option:
 
 ```sh
 dlt deploy pipedrive_pipeline.py airflow-composer --secrets-format env
@@ -111,8 +114,7 @@ c66c..
 
 ### 2. Modify DAG file
 
-In directory `dags/` you can find the file `dag_pipedrive.py` that you need to edit. It has the
-following structure:
+In the directory `dags/`, you can find the file `dag_pipedrive.py` that you need to edit. It has the following structure:
 
 ```py
 import dlt
@@ -120,7 +122,7 @@ from airflow.decorators import dag
 from dlt.common import pendulum
 from dlt.helpers.airflow_helper import PipelineTasksGroup
 
-# Modify the dag arguments
+# Modify the DAG arguments
 default_task_args = {
     'owner': 'airflow',
     'depends_on_past': False,
@@ -139,11 +141,11 @@ default_task_args = {
     default_args=default_task_args
 )
 def load_data():
-    # Set `use_data_folder` to True to store temporary data on the `data` bucket.
+    # Set `use_data_folder` to True to store temporary data in the `data` bucket.
     # Use only when it does not fit on the local storage
     tasks = PipelineTasksGroup("pipeline_name", use_data_folder=False, wipe_local_data=True)
 
-    # Import your source from pipeline script
+    # Import your source from the pipeline script
     from pipeline_or_source_script import source
 
     # Modify the pipeline parameters
@@ -151,7 +153,7 @@ def load_data():
         pipeline_name='pipeline_name',
         dataset_name='dataset_name',
         destination='duckdb',
-        dev_mode=False # must be false if we decompose
+        dev_mode=False  # Must be false if we decompose
     )
     # Create the source, the "serialize" decompose option
     # will convert dlt resources into Airflow tasks.
@@ -196,16 +198,13 @@ load_data()
 
 - Customize the PipelineTaskGroup:
 
-  - change the name from “pipeline_name” to yours, for example, “pipedrive”.
-  - change runtime settings: data_folder, logging, retry policy etc. For example, let’s wipe all the
-    data created by pipeline (`wipe_local_data=True`), redirect dlt logger into task logger
-    (`use_task_logger=True`) and set the retry policy as a Retrying class object with three restart
-    attempts.
+  - Change the name from “pipeline_name” to yours, for example, “pipedrive”.
+  - Change runtime settings: data_folder, logging, retry policy, etc. For example, let’s wipe all the data created by the pipeline (`wipe_local_data=True`), redirect the dlt logger into the task logger (`use_task_logger=True`), and set the retry policy as a Retrying class object with three restart attempts.
 
   ```py
   from tenacity import Retrying, stop_after_attempt
 
-  # Set `use_data_folder` to True to store temporary data on the `data` bucket.
+  # Set `use_data_folder` to True to store temporary data in the `data` bucket.
   # Use only when it does not fit on the local storage
   tasks = PipelineTasksGroup(
       pipeline_name="pipedrive",
@@ -216,24 +215,21 @@ load_data()
   )
   ```
 :::tip
-When you run `load_data` DAG above, Airflow  will call `source` function every 30 seconds (by default) to be able to monitor the tasks. Make sure that your source function does not do any long lasting operations ie. reflecting source database. In case of [sql_database](../../dlt-ecosystem/verified-sources/sql_database/index.md) we added an option to delay database reflection until data is accessed by a resource.
+When you run the `load_data` DAG above, Airflow will call the `source` function every 30 seconds (by default) to be able to monitor the tasks. Make sure that your source function does not perform any long-lasting operations, e.g., reflecting the source database. In the case of [sql_database](../../dlt-ecosystem/verified-sources/sql_database/index.md), we added an option to delay database reflection until data is accessed by a resource.
 :::
 
 ### 3. Import sources and move the relevant code from the pipeline script
 
-You should now move your working code from the pipeline script you previously ran to the newly
-created DAG script.
+You should now move your working code from the pipeline script you previously ran to the newly created DAG script.
 
-- Import your sources from your existing pipeline script - after task group is created:
+- Import your sources from your existing pipeline script - after the task group is created:
 
   ```py
-  # Import your source from pipeline script
+  # Import your source from the pipeline script
   from pipedrive import pipedrive_source
   ```
 
-  If you create your pipeline, instance in your source in the “__main__” function in your script
-  then copy it here. For example, look at the `load_from_start_date` function in
-  `pipedrive_pipeline.py`:
+  If you create your pipeline instance in your source in the "__main__" function in your script, then copy it here. For example, look at the `load_from_start_date` function in `pipedrive_pipeline.py`:
 
   ```py
   """Example to incrementally load activities limited to items updated after a given date"""
@@ -242,11 +238,11 @@ created DAG script.
     pipeline_name="pipedrive", destination='duckdb', dataset_name="pipedrive_data"
   )
 
-  # First source configure to load everything except activities from the beginning
+  # First source configured to load everything except activities from the beginning
   source = pipedrive_source()
   source.resources["activities"].selected = False
 
-  # Another source configured to activities starting at the given date (custom_fields_mapping is included to translate custom field hashes to names)
+  # Another source configured to load activities starting at the given date (custom_fields_mapping is included to translate custom field hashes to names)
   activities_source = pipedrive_source(
     since_timestamp="2023-03-01 00:00:00Z"
   ).with_resources("activities", "custom_fields_mapping")
@@ -254,15 +250,11 @@ created DAG script.
 
   Copy this part of the code to the `dags/dag_pipedrive.py` script.
 
-  **Note:** Task ids in the task group should be still unique globally, so we have to exclude
-  "custom_fields_mapping” from `activities_source`. See
-  [Troubleshooting](deploy-with-airflow-composer.md#troubleshooting) section.
+  **Note:** Task ids in the task group should still be unique globally, so we have to exclude "custom_fields_mapping" from `activities_source`. See the [Troubleshooting](deploy-with-airflow-composer.md#troubleshooting) section.
 
 - Pass your pipeline instance and source instance to the `add_run` method of tasks.
 
-  **Note**: PipelineTasksGroup can’t handle the list of sources (e.g. data=\[source,
-  activities_source\]), so we have to add them sequentially. See
-  [Troubleshooting](deploy-with-airflow-composer.md#troubleshooting) section.
+  **Note**: PipelineTasksGroup can’t handle the list of sources (e.g., data=[source, activities_source]), so we have to add them sequentially. See the [Troubleshooting](deploy-with-airflow-composer.md#troubleshooting) section.
 
   ```py
   # Create the source,
@@ -279,7 +271,7 @@ created DAG script.
   )
 
   # PipelineTasksGroup can’t handle the list of
-  # sources (e.g. data=[source, activities_source]),
+  # sources (e.g., data=[source, activities_source]),
   # so we have to add them sequentially
   tasks.add_run(
       pipeline=pipeline,
@@ -291,8 +283,7 @@ created DAG script.
   )
   ```
 
-- Customize the name of Airflow DAG by changing the name of the load_data function to your desired
-  name, for example to `load_pipedrive_data`.
+- Customize the name of the Airflow DAG by changing the name of the load_data function to your desired name, for example, to `load_pipedrive_data`.
 
 - Modify the @dag arguments. Set up *schedule, start_date, default_task_args*.
 
@@ -335,7 +326,7 @@ def load_pipedrive_data():
         retry_policy=Retrying(stop=stop_after_attempt(3), reraise=True),
     )
 
-    # Import your source from pipeline script
+    # Import your source from the pipeline script
     from pipedrive import pipedrive_source
 
     """Example to incrementally load activities limited to items updated after a given date"""
@@ -344,12 +335,12 @@ def load_pipedrive_data():
         pipeline_name="pipedrive", destination='duckdb', dataset_name="pipedrive_data"
     )
 
-    # First source configure to load everything
+    # First source configured to load everything
     # except activities from the beginning
     source = pipedrive_source()
     source.resources["activities"].selected = False
 
-    # Another source configured to activities
+    # Another source configured to load activities
     # starting at the given date (custom_fields_mapping is included to
     # translate custom field hashes to names)
     activities_source = pipedrive_source(
@@ -369,7 +360,7 @@ def load_pipedrive_data():
     )
 
     # PipelineTasksGroup can’t handle the list of sources
-    # (e.g. data=[source, activities_source]),
+    # (e.g., data=[source, activities_source]),
     # so we have to add them sequentially.
     tasks.add_run(
         pipeline=pipeline,
@@ -385,9 +376,9 @@ load_pipedrive_data()
 
 ### 4. Add credentials
 
-There are two ways to pass the credentials
+There are two ways to pass the credentials:
 
-1. in the `dlt_secrets_toml` Airflow variable.
+1. In the `dlt_secrets_toml` Airflow variable.
 
    - During the execution of the `deploy` command with `--secrets-format toml`, secret variables
      will be displayed in the output:
@@ -399,12 +390,12 @@ There are two ways to pass the credentials
      pipedrive_api_key = "c66c..."
      ```
 
-   - Launch the Airflow UI, head to **Admin** top level menu and select **Variables**
-   - Add a new variable with a name `dlt_secrets_toml`
+   - Launch the Airflow UI, head to the **Admin** top-level menu, and select **Variables**.
+   - Add a new variable with the name `dlt_secrets_toml`.
    - Paste the `toml` fragment displayed by the `deploy` command.
-   - 💡 the content of this variable will be used by `dlt` Airflow helper instead of local `secrets.toml` which you are familiar with. If your local secrets file contains anything else you want to access on Airflow, you are good to just copy the local `secrets.toml` content to `dlt_secrets_toml` variable.
+   - 💡 The content of this variable will be used by the `dlt` Airflow helper instead of the local `secrets.toml` which you are familiar with. If your local secrets file contains anything else you want to access on Airflow, you are good to just copy the local `secrets.toml` content to the `dlt_secrets_toml` variable.
 
-1. as ENVIRONMENT VARIABLES.
+1. As environment variables.
 
    - During the execution of the `deploy` command with `--secrets-format env` (by default),
      environment variables will be displayed in the output:
@@ -421,24 +412,23 @@ There are two ways to pass the credentials
      c66c...
      ```
 
-   - Copy capitalized variables and add it into airflow’s env variables, save it. Now, `dlt` can
+   - Copy capitalized variables and add them into Airflow’s env variables, then save it. Now, `dlt` can
      pick it up.
 
      ![add-credential](images/add-credential.png)
 
-
 ### 5. Configure `build/cloudbuild.yaml` to run it with Google Cloud Platform \[[Cloud Composer](https://console.cloud.google.com/composer/environments)\]
 
 - Read our doc
-  [How to deploy the main branch of your airflow project from GitHub to Cloud Composer](../../reference/explainers/airflow-gcp-cloud-composer.md).
+  [How to deploy the main branch of your Airflow project from GitHub to Cloud Composer](../../reference/explainers/airflow-gcp-cloud-composer.md).
 
   There you can find:
 
   - How to set up the Cloud Composer Environment.
-  - How to create trigger.
+  - How to create a trigger.
   - How to add the libraries needed.
 
-- Set **\_BUCKET_NAME** up in `build/cloudbuild.yaml` file.
+- Set **\_BUCKET_NAME** up in the `build/cloudbuild.yaml` file.
 
   The file has the following structure:
 
@@ -473,7 +463,7 @@ There are two ways to pass the credentials
       _BUCKET_NAME: "set me up!"
     ```
 
-### 6. Finish the Airflow deploying
+### 6. Finish the Airflow deployment
 
 1. Commit and push the pipeline files to GitHub.
 
@@ -495,13 +485,13 @@ There are two ways to pass the credentials
    git push origin
    ```
 
-1. Run the trigger you build (in
+1. Run the trigger you built (in
    [Cloud Build](https://console.cloud.google.com/cloud-build/dashboard)).
 
    ![run-trigger](images/run-trigger.png)
 
 1. Wait a minute, and check if your files arrived in the bucket. In our case, we added a whole
-   repository with `pipedrive` project, and we can see it appeared.
+   repository with the `pipedrive` project, and we can see it appeared.
 
 ## Troubleshooting
 
@@ -509,10 +499,9 @@ This section will cover the most common probable errors and exceptions.
 
 ### 1. ModuleNotFoundError: No module named ‘{source_name}’
 
-If you got this error, then make sure that your repository has been completely copied to the DAGs
-folder.
+If you received this error, make sure that your repository has been completely copied to the DAGs folder.
 
-If you run it locally, then check your `airflow.cfg` file (line 4):
+If you are running it locally, check your `airflow.cfg` file (line 4):
 
 ```text
 # The folder where your airflow pipelines live, most likely a
@@ -525,8 +514,7 @@ dags_folder = absolute_path_to_your_project
 
 ### 2. ValueError: Can only decompose dlt sources
 
-In this case you perhaps passed a list of sources to the method `add_run` as data or another
-unacceptable data structure and provided `decompose = "serialize"`.
+In this case, you perhaps passed a list of sources to the method `add_run` as data or another unacceptable data structure and provided `decompose = "serialize"`.
 
 For example:
 
@@ -538,12 +526,9 @@ tasks.add_run(
 )
 ```
 
-If `data` is a DltSource and `decompose` is “serialize” it will decompose the source into disjoint
-connected components (isolated group of resources) and execute them one after another as separate
-Airflow tasks.
+If `data` is a DltSource and `decompose` is “serialize,” it will decompose the source into disjoint connected components (isolated groups of resources) and execute them one after another as separate Airflow tasks.
 
-PipelineTasksGroup can't handle the list of sources in the “serialize” mode, it can only decompose
-`DltSource`, so we have to add them sequentially:
+PipelineTasksGroup can't handle the list of sources in the “serialize” mode; it can only decompose `DltSource`, so we have to add them sequentially:
 
 ```py
 tasks.add_run(
@@ -558,37 +543,36 @@ tasks.add_run(
 )
 ```
 
-Or you should set the `decompose = "none”` to run it as the one Airflow task.
+Or you should set the `decompose = "none"` to run it as one Airflow task.
 
 ### 3. DuplicateTaskIdFound: Task id ‘{your_task_id}’ has already been added to the DAG
 
-In case of `pipedrive` pipeline we tried to load data from “custom_fields_mapping” twice.
+In the case of the `pipedrive` pipeline, we tried to load data from “custom_fields_mapping” twice.
 
 ```py
-# First source configure to load everything except activities from the beginning
+# First source configured to load everything except activities from the beginning
 source = pipedrive_source()
 source.resources["activities"].selected = False
 
-# Another source configured to activities starting at the given date
+# Another source configured to load activities starting at the given date
 # (custom_fields_mapping is included to translate custom field hashes to names)
 activities_source = pipedrive_source(
     since_timestamp="2023-03-01 00:00:00Z"
 ).with_resources("activities", "custom_fields_mapping")
 ```
 
-Because of this we got the following error:
+Because of this, we received the following error:
 
 ```sh
 airflow.exceptions.DuplicateTaskIdFound:
 Task id ‘pipedrive.pipedrive_custom_fields_mapping’ has already been added to the DAG
 ```
 
-Task ids in the task group should be still unique globally, so in this case we have to exclude
-“custom_fields_mapping” from `activities_source`. “custom_fields_mapping” will be taken from the
-current state to translate custom field hashes to names:
+Task ids in the task group should still be unique globally, so in this case, we have to exclude “custom_fields_mapping” from `activities_source`. “Custom_fields_mapping” will be taken from the current state to translate custom field hashes to names:
 
 ```py
 activities_source = pipedrive_source(
     since_timestamp="2023-03-01 00:00:00Z"
 ).with_resources("activities")
 ```
+
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-dagster.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-dagster.md
index e27bb2966a..aa9465bc53 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-dagster.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-dagster.md
@@ -9,47 +9,47 @@ keywords: [how to, deploy a pipeline, Dagster]
 ## Introduction to Dagster
 
 Dagster is an orchestrator designed for developing and maintaining data assets, such as
-tables, data sets, machine learning models, and reports. Dagster ensures these processes are
+tables, datasets, machine learning models, and reports. Dagster ensures these processes are
 reliable and focuses on using software-defined assets (SDAs) to simplify complex data management,
 enhance the ability to reuse code, and provide a better understanding of data.
 
 To read more, please refer to Dagster’s
 [documentation.](https://docs.dagster.io/getting-started?_gl=1*19ikq9*_ga*NTMwNTUxNDAzLjE3MDg5Mjc4OTk.*_ga_84VRQZG7TV*MTcwOTkwNDY3MS4zLjEuMTcwOTkwNTYzNi41Ny4wLjA.*_gcl_au*OTM3OTU1ODMwLjE3MDg5Mjc5MDA.)
 
-### Dagster Cloud Features
+### Dagster Cloud features
 
-Dagster Cloud offers enterprise-level orchestration service with serverless or hybrid deployment
+Dagster Cloud offers an enterprise-level orchestration service with serverless or hybrid deployment
 options. It incorporates native branching and built-in CI/CD to prioritize the developer experience.
 It enables scalable, cost-effective operations without the hassle of infrastructure management.
 
 ### Dagster deployment options: **Serverless** versus **Hybrid**
 
 The *serverless* option fully hosts the orchestration engine, while the *hybrid* model offers
-flexibility to use your computing resources, with Dagster managing the control plane. Reducing
+flexibility to use your computing resources, with Dagster managing the control plane, reducing
 operational overhead and ensuring security.
 
 For more info, please refer to the Dagster Cloud [docs.](https://dagster.io/cloud)
 
-### Using Dagster for Free
+### Using Dagster for free
 
 Dagster offers a 30-day free trial during which you can explore its features, such as pipeline
 orchestration, data quality checks, and embedded ELTs. You can try Dagster using its open source or
 by signing up for the trial.
 
-## Building Data Pipelines with `dlt`
+## Building data pipelines with `dlt`
 
 **How does `dlt` integrate with Dagster for pipeline orchestration?**
 
 `dlt` integrates with Dagster for pipeline orchestration, providing a streamlined process for
 building, enhancing, and managing data pipelines. This enables developers to leverage `dlt`'s
-capabilities for handling data extraction and load and Dagster's orchestration features to efficiently manage and monitor data pipelines.
+capabilities for handling data extraction and load, and Dagster's orchestration features to efficiently manage and monitor data pipelines.
 
 Dagster supports [native integration with dlt](https://docs.dagster.io/integrations/embedded-elt/dlt),
 here is a guide on how this integration works.
 
 ### Orchestrating `dlt` pipeline on Dagster
 
-Here's a concise guide to orchestrating a `dlt` pipeline with Dagster, creating a pipeline which ingests GitHub issues data from a repository and loads it to DuckDB.
+Here's a concise guide to orchestrating a `dlt` pipeline with Dagster, creating a pipeline that ingests GitHub issues data from a repository and loads it into DuckDB.
 
 You can find the full example code in [this repository](https://github.com/dlt-hub/dlthub-education/blob/main/workshops/workshop_august_2024/part2/deployment/deploy_dagster/README.md).
 
@@ -68,9 +68,9 @@ You can find the full example code in [this repository](https://github.com/dlt-h
       ```
       ![image](https://github.com/user-attachments/assets/f9002de1-bcdf-49f4-941b-abd59ea7968d)
 
-1. In your Dagster project, define the dlt pipeline in `github_source` folder.
+1. In your Dagster project, define the dlt pipeline in the `github_source` folder.
 
-   **Note**: The dlt Dagster helper works only with dlt sources. Your resources always should be grouped in a source.
+   **Note**: The dlt Dagster helper works only with dlt sources. Your resources should always be grouped in a source.
      ```py
      import dlt
      ...
@@ -84,7 +84,7 @@ You can find the full example code in [this repository](https://github.com/dlt-h
      ):
          url = (
              f"{BASE_URL}?since={updated_at.last_value}&per_page=100&sort=updated"
-             "&directions=desc&state=open"
+             "&direction=desc&state=open"
          )
          yield pagination(url)
 
@@ -95,7 +95,7 @@ You can find the full example code in [this repository](https://github.com/dlt-h
  1. Create a `dlt_assets` definition.
 
     The `@dlt_assets` decorator takes a `dlt_source` and `dlt_pipeline` parameter.
-    In this example, we used the `github_source` source and created a `dlt_pipeline` to ingest data from Github to DuckDB.
+    In this example, we used the `github_source` source and created a `dlt_pipeline` to ingest data from GitHub to DuckDB.
 
     Here’s an example of how to define assets (`github_source/assets.py`):
 
@@ -164,7 +164,7 @@ You can find the full example code in [this repository](https://github.com/dlt-h
 
 1. Run the pipeline.
 
-   Now that you have a running instance of Dagster, we can run our data pipeline.
+   Now that you have a running instance of Dagster, you can run your data pipeline.
 
    To run the pipeline, go to **Assets** and click the **Materialize** button in the top right. In Dagster, materialization refers to executing the code associated with an asset to produce an output.
 
@@ -201,7 +201,7 @@ For a complete picture of Dagster's integration with dlt, please refer to their
       dlt_pipeline=dlt.pipeline(
         pipeline_name="google_analytics_pipeline_1",
         destination="bigquery",
-        dataset_name="gaoogle_analytics_data_1"
+        dataset_name="google_analytics_data_1"
       ),
       group_name='Google_Analytics'
   )
@@ -214,7 +214,7 @@ For a complete picture of Dagster's integration with dlt, please refer to their
       dlt_pipeline=dlt.pipeline(
         pipeline_name="google_analytics_pipeline_2",
         destination="bigquery",
-        dataset_name="gaoogle_analytics_data_2"
+        dataset_name="google_analytics_data_2"
       ),
       group_name='Google_Analytics'
   )
@@ -278,12 +278,11 @@ For a complete picture of Dagster's integration with dlt, please refer to their
       return resource_list
   ```
 
-
-### Additional Resources
+### Additional resources
 
 - Check out the [Dagster Cloud Documentation](https://docs.dagster.cloud/) to learn more about deploying on Dagster Cloud.
 
-- Learn more about Dagtser's integration with dlt:
+- Learn more about Dagster's integration with dlt:
   [dlt & Dagster](https://docs.dagster.io/integrations/embedded-elt/dlt)
   [Embedded ELT Documentation](https://docs.dagster.io/_apidocs/libraries/dagster-embedded-elt#dagster_embedded_elt.dlt.dlt_assets).
 
@@ -304,3 +303,4 @@ For a complete picture of Dagster's integration with dlt, please refer to their
 :::note
 Some of these are external repositories and are subject to change.
 :::
+
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-github-actions.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-github-actions.md
index 1578e8fa68..38ffcb3d68 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-github-actions.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-github-actions.md
@@ -29,11 +29,11 @@ python3 chess_pipeline.py # replace chess_pipeline.py with your pipeline file
 This should successfully load data from the source to the destination once.
 
 ## Initialize deployment
-First you need to add additional dependencies that `deploy` command requires:
+First, you need to add additional dependencies that the `deploy` command requires:
 ```sh
 pip install "dlt[cli]"
 ```
-then the command below will create a Github workflow that runs your pipeline script every 30 minutes:
+Then, the command below will create a GitHub workflow that runs your pipeline script every 30 minutes:
 ```sh
 dlt deploy chess_pipeline.py github-action --schedule "*/30 * * * *"
 ```
@@ -71,9 +71,10 @@ repository.
 
 ## Known limitations
 
-The GitHub cron scheduler has fidelity of ~30 minutes. You cannot expect that your job will be run
-at the exact the intervals or times you specify.
+The GitHub cron scheduler has a fidelity of ~30 minutes. You cannot expect that your job will be run
+at the exact intervals or times you specify.
 
 - The minimum official supported interval is 5 minutes.
 - If you set it to 5 minutes, you can expect intervals between 5 and 30 minutes.
 - From practical experience, any intervals above 30 minutes work on average as expected.
+
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions.md
index 008f00e5d0..7e5683d61b 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-google-cloud-functions.md
@@ -6,16 +6,11 @@ keywords: [how to, deploy a pipeline, Cloud Function]
 
 # Deploy a pipeline with Google Cloud Functions
 
-This guide shows you how to deploy a pipeline using the gcloud shell and `dlt` CLI commands. To
-deploy a pipeline using this method, you must have a working knowledge of GCP and its associated
-services, such as cloud functions, cloud source repositories, shell editor, IAM and permissions, and
-GCP service accounts.
+This guide shows you how to deploy a pipeline using the gcloud shell and `dlt` CLI commands. To deploy a pipeline using this method, you must have a working knowledge of GCP and its associated services, such as Cloud Functions, Cloud Source Repositories, Shell Editor, IAM and permissions, and GCP service accounts.
 
-To deploy a pipeline using the GCP cloud functions, you'll first need to set up a empty repo in
-Cloud Source Repositories, a service provided by GCP for hosting repositories, or you can clone it
-to your local machine and then deploy it using the Google Cloud CLI.
+To deploy a pipeline using GCP Cloud Functions, you'll first need to set up an empty repo in Cloud Source Repositories, a service provided by GCP for hosting repositories, or you can clone it to your local machine and then deploy it using the Google Cloud CLI.
 
-## 1. Setup Pipeline in Google Cloud Repositories
+## 1. Setup pipeline in Google Cloud Repositories
 
 To deploy the pipeline, we'll use the Google Cloud Source Repositories method.
 
@@ -23,26 +18,20 @@ To deploy the pipeline, we'll use the Google Cloud Source Repositories method.
 1. To set up the environment, you can follow these steps:
    - Create an empty repo in Cloud Source Repositories.
    - After creating the repo, click Edit repo to open it in a "Shell Editor".
-   - You can also skip creating the repo and use the shell editor directly, depending on your
-     requirements.
+   - You can also skip creating the repo and use the Shell Editor directly, depending on your requirements.
 1. In this guide, we'll be setting up the `dlt`
-   [Notion verified source](../../dlt-ecosystem/verified-sources/notion). However, you can use any
-   verified source or create a custom one to suit your needs.
-1. In the shell editor:
-   - Run the following command to initialise the verified source with Notion and create a pipeline
-     example with BigQuery as the target.
+   [Notion verified source](../../dlt-ecosystem/verified-sources/notion). However, you can use any verified source or create a custom one to suit your needs.
+1. In the Shell Editor:
+   - Run the following command to initialize the verified source with Notion and create a pipeline example with BigQuery as the target.
 
      ```sh
      dlt init notion bigquery
      ```
 
-   - After the command is executed, new files and folders with the necessary configurations are
-     created in the main directory where the command was executed.
+   - After the command is executed, new files and folders with the necessary configurations are created in the main directory where the command was executed.
 
-   - Detailed information about initialising a verified source and a pipeline example can be found
-     in the `dlthub` [documentation](../../dlt-ecosystem/verified-sources/notion).
-1. Create a new Python file called "main.py" in the main directory. The file can be configured as
-   follows:
+   - Detailed information about initializing a verified source and a pipeline example can be found in the `dlthub` [documentation](../../dlt-ecosystem/verified-sources/notion).
+1. Create a new Python file called "main.py" in the main directory. The file can be configured as follows:
    ```py
    from notion_pipeline import load_databases
 
@@ -50,30 +39,22 @@ To deploy the pipeline, we'll use the Google Cloud Source Repositories method.
      load_databases()
      return "Pipeline run successfully!"
    ```
-   By default, Google Cloud Functions looks for the main.py file in the main directory, and we
-   called the `load_databases()` function from notion_pipeline.py as shown above.
+   By default, Google Cloud Functions looks for the main.py file in the main directory, and we called the `load_databases()` function from notion_pipeline.py as shown above.
 1. If you need any additional dependencies, add them to `requirements.txt` that got created.
 
 ## 2. Deploying GCP Cloud Function
 
-In a shell editor, navigate to the main directory where the "main.py" file is located and run the
-following command in the terminal:
+In a Shell Editor, navigate to the main directory where the "main.py" file is located and run the following command in the terminal:
 
 ```sh
 gcloud functions deploy pipeline_notion --runtime python310 \
   --trigger-http --allow-unauthenticated --source . --timeout 300
 ```
 
-- This command uses a function called "pipeline_notion" with Python 3.10 as the runtime environment,
-  an HTTP trigger, and allows unauthenticated access. The source "." refers to all files in the
-  directory. The timeout is set to 5 minutes (300 seconds).
-- If you are uploading a large number of files to the destination, you can increase this to 60
-  minutes for HTTP functions. 10 minutes for event-driven functions. To learn more about the
-  function timeout, see the
-  [documentation here](https://cloud.google.com/functions/docs/configuring/timeout).
+- This command uses a function called "pipeline_notion" with Python 3.10 as the runtime environment, an HTTP trigger, and allows unauthenticated access. The source "." refers to all files in the directory. The timeout is set to 5 minutes (300 seconds).
+- If you are uploading a large number of files to the destination, you can increase this to 60 minutes for HTTP functions and 10 minutes for event-driven functions. To learn more about the function timeout, see the [documentation here](https://cloud.google.com/functions/docs/configuring/timeout).
 
-> Your project has a default service account associated with the project ID. Please assign the
-> `Cloud Functions Developer` role to the associated service account.
+> Your project has a default service account associated with the project ID. Please assign the `Cloud Functions Developer` role to the associated service account.
 
 ## 3. Setting up environmental variables in the Cloud Function
 
@@ -84,7 +65,7 @@ Environmental variables can be declared in the Cloud Function in two ways:
 - Go to the Google Cloud Function and select the deployed function. Click 'EDIT'.
 - Navigate to the 'BUILD' tab and click 'ADD VARIABLE' under 'BUILD ENVIRONMENTAL VARIABLE'.
 - Enter a name for the variable that corresponds to the argument required by the pipeline. Make sure
-  to capitalise the variable name if it is specified in "secrets.toml". For example, if the variable
+  to capitalize the variable name if it is specified in "secrets.toml". For example, if the variable
   name is `api_key`, set the variable name to "API_KEY".
 - Enter the value for the Notion API key.
 - Click Next and deploy the function.
@@ -113,3 +94,4 @@ in the address bar. The message "Pipeline run successfully!" would mean that the
 successfully run and the data was successfully loaded into the destination.
 
 That's it! Have fun using `dlt` in Google Cloud Functions!
+
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-kestra.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-kestra.md
index cfb63ce808..ef0ff14b43 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-kestra.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-kestra.md
@@ -8,9 +8,9 @@ keywords: [how to, deploy a pipeline, Kestra]
 
 ## Introduction to Kestra
 
-[Kestra](https://kestra.io/docs) is an open-source, scalable orchestration platform that enables
-engineers to manage business-critical workflows declaratively in code. By applying 
-infrastructure as code best practices to data, process, and microservice orchestration, you
+[Kestra](https://kestra.io/docs) is an open-source, scalable orchestration platform that enables
+engineers to manage business-critical workflows declaratively in code. By applying
+infrastructure as code best practices to data, process, and microservice orchestration, you
 can build and manage reliable workflows.
 
 Kestra facilitates reliable workflow management, offering advanced settings for resiliency,
@@ -30,21 +30,21 @@ Kestra provides a robust orchestration engine with features including:
 
 To know more, please refer to [Kestra's documentation.](https://kestra.io/docs)
 
-## Building Data Pipelines with `dlt`
+## Building data pipelines with `dlt`
 
 **`dlt`** is an open-source Python library that allows you to declaratively load data sources
 into well-structured tables or datasets. It does this through automatic schema inference and evolution.
-The library simplifies building data pipeline by providing functionality to support the entire extract 
+The library simplifies building data pipelines by providing functionality to support the entire extract 
 and load process.
 
 ### How does `dlt` integrate with Kestra for pipeline orchestration?
 
 To illustrate setting up a pipeline in Kestra, we’ll be using the following example: 
-[From Inbox to Insights AI-Enhanced Email Analysis with dlt and Kestra.](https://kestra.io/blogs/2023-12-04-dlt-kestra-usage)
+[From Inbox to Insights: AI-Enhanced Email Analysis with dlt and Kestra.](https://kestra.io/blogs/2023-12-04-dlt-kestra-usage)
 
 The example demonstrates automating a workflow to load data from Gmail to BigQuery using the `dlt`,
 complemented by AI-driven summarization and sentiment analysis. You can refer to the project's
-github repo by clicking [here.](https://github.com/dlt-hub/dlt-kestra-demo)
+GitHub repo by clicking [here.](https://github.com/dlt-hub/dlt-kestra-demo)
 
 :::info 
 For the detailed guide, please take a look at the project's [README](https://github.com/dlt-hub/dlt-kestra-demo/blob/main/README.md) section. 
@@ -54,21 +54,21 @@ Here is the summary of the steps:
 
 1. Start by creating a virtual environment.
 
-1. Generate an `.env` File: Inside your project repository, create an `.env` file to store
+2. Generate an `.env` file: Inside your project repository, create an `.env` file to store
    credentials in "base64" format, prefixed with 'SECRET\_' for compatibility with Kestra's `secret()`
    function.
 
-1. As per Kestra’s recommendation, install the docker desktop on your machine.
+3. As per Kestra’s recommendation, install Docker Desktop on your machine.
 
-1. Ensure Docker is running, then download the Docker compose file with:
+4. Ensure Docker is running, then download the Docker Compose file with:
 
    ```sh
     curl -o docker-compose.yml \
     https://raw.githubusercontent.com/kestra-io/kestra/develop/docker-compose.yml
    ```
 
-1. Configure Docker compose file: 
-   Edit the downloaded Docker compose file to link the `.env` file for environment 
+5. Configure Docker Compose file: 
+   Edit the downloaded Docker Compose file to link the `.env` file for environment 
    variables.
 
    ```yaml
@@ -78,24 +78,24 @@ Here is the summary of the steps:
            - .env
    ```
 
-1. Enable Auto-Restart: In your `docker-compose.yml`, set `restart: always` for both postgres and
-   kestra services to ensure they reboot automatically after a system restart.
+6. Enable auto-restart: In your `docker-compose.yml`, set `restart: always` for both PostgreSQL and
+   Kestra services to ensure they reboot automatically after a system restart.
 
-1. Launch Kestra Server: Execute `docker compose up -d` to start the server.
+7. Launch Kestra server: Execute `docker compose up -d` to start the server.
 
-1. Access Kestra UI: Navigate to `http://localhost:8080/` to use the Kestra user interface.
+8. Access Kestra UI: Navigate to `http://localhost:8080/` to use the Kestra user interface.
 
-1. Create and Configure Flows:
+9. Create and configure flows:
 
    - Go to 'Flows', then 'Create'.
    - Configure the flow files in the editor.
    - Save your flows.
 
-1. **Understand Flow Components**:
+10. **Understand flow components**:
 
-   - Each flow must have an `id`, `namespace`, and a list of `tasks` with their respective `id` and
-     `type`.
-   - The main flow orchestrates tasks like loading data from a source to a destination.
+    - Each flow must have an `id`, `namespace`, and a list of `tasks` with their respective `id` and
+      `type`.
+    - The main flow orchestrates tasks like loading data from a source to a destination.
 
 By following these steps, you establish a structured workflow within Kestra, leveraging its powerful
 features for efficient data pipeline orchestration.
@@ -105,7 +105,7 @@ For detailed information on these steps, please consult the `README.md` in the
 [dlt-kestra-demo](https://github.com/dlt-hub/dlt-kestra-demo/blob/main/README.md) repo.
 :::
 
-### Additional Resources
+### Additional resources
 
 - Ingest Zendesk data into Weaviate using `dlt` with Kestra:
   [here](https://kestra.io/blueprints/148-ingest-zendesk-data-into-weaviate-using-dlt).
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-prefect.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-prefect.md
index 8f58ded0e6..74ee776e81 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-prefect.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-prefect.md
@@ -14,14 +14,14 @@ Prefect is a workflow orchestration and observability platform that automates an
 
 ### Prefect features
 
-- **Flows**:  These contain workflow logic, and are defined as Python functions.
-- **Tasks**:  A task represents a discrete unit of work. Tasks allow encapsulation of workflow logic that can be reused for flows and subflows.
-- **Deployments and Scheduling**:  Deployments transform workflows from manually called functions into API-managed entities that you can trigger remotely. Prefect allows you to use schedules to automatically create new flow runs for deployments or trigger new runs based on events.
-- **Automations:** Prefect Cloud enables you to configure [actions](https://docs.prefect.io/latest/concepts/automations/#actions) that Prefect executes automatically based on [triggers](https://docs.prefect.io/latest/concepts/automations/#triggers).
-- **Caching:** This feature enables a task to reflect a completed state without actually executing its defining code.
-- **Oberservality**: This feature allows users to monitor workflows and tasks. It provides insights into data pipeline performance and behavior through logging, metrics, and notifications.
+- **Flows**: These contain workflow logic and are defined as Python functions.
+- **Tasks**: A task represents a discrete unit of work. Tasks allow encapsulation of workflow logic that can be reused for flows and subflows.
+- **Deployments and Scheduling**: Deployments transform workflows from manually called functions into API-managed entities that you can trigger remotely. Prefect allows you to use schedules to automatically create new flow runs for deployments or trigger new runs based on events.
+- **Automations**: Prefect Cloud enables you to configure [actions](https://docs.prefect.io/latest/concepts/automations/#actions) that Prefect executes automatically based on [triggers](https://docs.prefect.io/latest/concepts/automations/#triggers).
+- **Caching**: This feature enables a task to reflect a completed state without actually executing its defining code.
+- **Observability**: This feature allows users to monitor workflows and tasks. It provides insights into data pipeline performance and behavior through logging, metrics, and notifications.
 
-## Building Data Pipelines with `dlt`
+## Building data pipelines with `dlt`
 
 `dlt` is an open-source Python library that enables the declarative loading of data sources into well-structured tables or datasets by automatically inferring and evolving schemas. It simplifies the construction of data pipelines by offering functionality to support the complete extract and load process.
 
@@ -29,20 +29,20 @@ Prefect is a workflow orchestration and observability platform that automates an
 
 Here's a concise guide to orchestrating a `dlt` pipeline with Prefect using "Moving Slack data into BigQuery" as an example. You can find a comprehensive, step-by-step guide in the article [“Building resilient data pipelines in minutes with dlt + Prefect”,](https://www.prefect.io/blog/building-resilient-data-pipelines-in-minutes-with-dlt-prefect) and the corresponding GitHub repository [here.](https://github.com/dylanbhughes/dlt_slack_pipeline/blob/main/slack_pipeline_with_prefect.py)
 
-### Here’s a summary of the steps followed:
+### Here's a summary of the steps followed:
 
-1. Create a `dlt` pipeline. For detailed instructions on creating a pipeline, please refer to the [documentation](../create-a-pipeline).
+1. Create a `dlt` pipeline. For detailed instructions on creating a pipeline, please refer to the [documentation](../create-a-pipeline).
 
 1. Add `@task` decorator to the individual functions.
-    1. Here we use `@task` decorator for `get_users` function: 
+    1. Here we use the `@task` decorator for the `get_users` function: 
         
         ```py
         @task
         def get_users() -> None:
-            """Execute a pipeline that will load Slack users list."""
+            """Execute a pipeline that will load the Slack users list."""
         ```
         
-    1. Use `@flow` function on the `slack_pipeline` function as:
+    1. Use the `@flow` function on the `slack_pipeline` function as:
         
         ```py
         @flow
@@ -61,12 +61,13 @@ Here's a concise guide to orchestrating a `dlt` pipeline with Prefect using "Mov
         slack_pipeline.serve("slack_pipeline", cron="0 0 * * *")
     ```
     
-3. You can view deployment details and scheduled runs, including successes and failures, using [PrefectUI](https://app.prefect.cloud/auth/login). This will help you know when a pipeline ran or more importantly, when it did not.
+3. You can view deployment details and scheduled runs, including successes and failures, using [PrefectUI](https://app.prefect.cloud/auth/login). This will help you know when a pipeline ran or, more importantly, when it did not.
 
 ![Prefect Dashboard](images/prefect-dashboard.png)
 
-You can further extend the pipeline further by: 
+You can further extend the pipeline by: 
 
-- Setting up [remote infrastructure with workers.](https://docs.prefect.io/latest/tutorial/workers/?deviceId=bb3e22c1-c2c7-4981-bd5e-c81715503e08)
-- [Adding automations](https://docs.prefect.io/latest/concepts/automations/?deviceId=bb3e22c1-c2c7-4981-bd5e-c81715503e08), to notify the status of pipeline run.
+- Setting up [remote infrastructure with workers](https://docs.prefect.io/latest/tutorial/workers/?deviceId=bb3e22c1-c2c7-4981-bd5e-c81715503e08).
+- [Adding automations](https://docs.prefect.io/latest/concepts/automations/?deviceId=bb3e22c1-c2c7-4981-bd5e-c81715503e08) to notify the status of the pipeline run.
 - [Setting up retries](https://docs.prefect.io/latest/concepts/tasks/?deviceId=bb3e22c1-c2c7-4981-bd5e-c81715503e08#custom-retry-behavior).
+
diff --git a/docs/website/docs/walkthroughs/dispatch-to-multiple-tables.md b/docs/website/docs/walkthroughs/dispatch-to-multiple-tables.md
index 410a6b9aca..8cba219a7a 100644
--- a/docs/website/docs/walkthroughs/dispatch-to-multiple-tables.md
+++ b/docs/website/docs/walkthroughs/dispatch-to-multiple-tables.md
@@ -15,7 +15,7 @@ We'll use the [GitHub API](https://docs.github.com/en/rest) to fetch the events
 pip install "dlt[duckdb]"
 ```
 
-2. Create a new a new file `github_events_dispatch.py` and paste the following code:
+2. Create a new file `github_events_dispatch.py` and paste the following code:
 
 ```py
 import dlt
@@ -34,14 +34,14 @@ def repo_events(last_created_at=dlt.sources.incremental("created_at")):
         response.raise_for_status()
         yield response.json()
 
-        # stop requesting pages if the last element was already older than
-        # the initial value
-        # note: incremental will skip those items anyway, we just do not
-        # want to use the api limits
+        # Stop requesting pages if the last element was already older than
+        # the initial value.
+        # Note: incremental will skip those items anyway, we just do not
+        # want to use the API limits.
         if last_created_at.start_out_of_range:
             break
 
-        # get next page
+        # Get the next page.
         if "next" not in response.links:
             break
         url = response.links["next"]["url"]
@@ -60,11 +60,11 @@ print("------")
 print(load_info)
 ```
 
-In the code above we define a resource `repo_events` that fetches events from the GitHub API.
+In the code above, we define a resource `repo_events` that fetches events from the GitHub API.
 
-Events content never changes so we can use `append` write disposition and track new events using `created_at` field.
+Events content never changes, so we can use the `append` write disposition and track new events using the `created_at` field.
 
-We name the tables using a function that receives an event data and returns table name: `table_name=lambda i: i["type"]`
+We name the tables using a function that receives event data and returns the table name: `table_name=lambda i: i["type"]`
 
 3. Now run the script:
 
@@ -72,7 +72,7 @@ We name the tables using a function that receives an event data and returns tabl
 python github_events_dispatch.py
 ```
 
-4. Peek at created tables:
+4. Peek at the created tables:
 
 ```sh
 dlt pipeline -v github_events info
@@ -86,12 +86,14 @@ dlt pipeline -v github_events show
 ```
 
 :::tip
-Some of the events produce tables with really many nested tables. You can [control the level of table nesting](general-usage/source.md#reduce-the-nesting-level-of-generated-tables) with a decorator.
+Some of the events produce tables with many nested tables. You can [control the level of table nesting](general-usage/source.md#reduce-the-nesting-level-of-generated-tables) with a decorator.
 
 
-Another fun [Colab Demo](https://colab.research.google.com/drive/1BXvma_9R9MX8p_iSvHE4ebg90sUroty2#scrollTo=a3OcZolbaWGf) - we analyze reactions on duckdb repo!
+Another fun [Colab Demo](https://colab.research.google.com/drive/1BXvma_9R9MX8p_iSvHE4ebg90sUroty2#scrollTo=a3OcZolbaWGf) - we analyze reactions on the duckdb repo!
 
 :::
 
 Learn more:
-* [Change nesting of the tables](general-usage/source.md#reduce-the-nesting-level-of-generated-tables) with a decorator.
+* [Change the nesting of the tables](general-usage/source.md#reduce-the-nesting-level-of-generated-tables) with a decorator.
+
+
diff --git a/docs/website/docs/walkthroughs/run-a-pipeline.md b/docs/website/docs/walkthroughs/run-a-pipeline.md
index 8d74fdbf6f..0be66b448b 100644
--- a/docs/website/docs/walkthroughs/run-a-pipeline.md
+++ b/docs/website/docs/walkthroughs/run-a-pipeline.md
@@ -11,16 +11,16 @@ pipeline state, trace and handle the most common problems.
 
 ## 1. Write and execute pipeline script
 
-Once you [created a new pipeline](create-a-pipeline) or
-[added an verified source](add-a-verified-source) you want to use it to load data. You need to write
+Once you have [created a new pipeline](create-a-pipeline) or
+[added and verified a source](add-a-verified-source), you will want to use it to load data. You need to write
 (or [customize](add-a-verified-source#3-customize-or-write-a-pipeline-script)) a pipeline script,
-like the one below that loads the data from [chess.com](https://www.chess.com) API:
+like the one below that loads data from the [chess.com](https://www.chess.com) API:
 
 ```py
 import dlt
 from chess import chess
 
-if __name__ == "__main__" :
+if __name__ == "__main__":
     pipeline = dlt.pipeline(pipeline_name="chess_pipeline", destination='duckdb', dataset_name="games_data")
     # get data for a few famous players
     data = chess(['magnuscarlsen', 'rpragchess'], start_month="2022/11", end_month="2022/12")
@@ -28,10 +28,10 @@ if __name__ == "__main__" :
 ```
 
 The `run` method will [extract](../reference/explainers/how-dlt-works.md#extract) data from the
-chess API, [normalize](../reference/explainers/how-dlt-works.md#normalize) it into tables and then
-[load](../reference/explainers/how-dlt-works.md#load) into `duckdb` in form of one or many load
+chess API, [normalize](../reference/explainers/how-dlt-works.md#normalize) it into tables, and then
+[load](../reference/explainers/how-dlt-works.md#load) it into `duckdb` in the form of one or many load
 packages. The `run` method returns a `load_info` object that, when printed, displays information
-with pipeline and dataset names, ids of the load packages, optionally with the information on failed
+with pipeline and dataset names, ids of the load packages, and optionally, information on failed
 jobs. Add the following line to your script:
 
 ```py
@@ -49,9 +49,9 @@ Load package 1679931001.985323 is COMPLETED and contains no failed jobs
 
 ## 2. See the progress during loading
 
-Say you want to load a whole year of chess games and that it takes some time. You can enable
-progress bars or console logging to observe what pipeline is doing. We support most of the Python
-progress bar libraries, Python loggers or just a text console. To demonstrate, let's modify the
+Suppose you want to load a whole year of chess games and that it takes some time. You can enable
+progress bars or console logging to observe what the pipeline is doing. We support most of the Python
+progress bar libraries, Python loggers, or just a text console. To demonstrate, let's modify the
 script to get a year of chess games data:
 
 ```py
@@ -73,7 +73,7 @@ PROGRESS=enlighten python chess_pipeline.py
 
 Other libraries that you can use are [tqdm](https://github.com/tqdm/tqdm),
 [alive_progress](https://github.com/rsalmei/alive-progress). Set the name to `log` to dump progress
-to console periodically:
+to the console periodically:
 
 ```sh
 PROGRESS=log python chess_pipeline.py
@@ -84,13 +84,13 @@ PROGRESS=log python chess_pipeline.py
 ## 3. See your data and tables
 
 You can quickly inspect the generated tables, the data, see how many rows were loaded to which
-table, do SQL queries etc. by executing the following command from the same folder as your script:
+table, do SQL queries, etc., by executing the following command from the same folder as your script:
 
 ```sh
 dlt pipeline chess_pipeline show
 ```
 
-This will launch a Streamlit app, that you can open in your browser:
+This will launch a Streamlit app, which you can open in your browser:
 
 ```text
 Found pipeline chess_pipeline in /home/user-name/.dlt/pipelines
@@ -106,44 +106,44 @@ Collecting usage statistics. To deactivate, set browser.gatherUsageStats to Fals
 
 ## 4. Inspect a load process
 
-`dlt` loads data in form of **load packages**. Each package contains several jobs with data for
-particular tables. The packages are identified by **load_id**, that you can see in the printout
-above or get by running the following command:
+`dlt` loads data in the form of **load packages**. Each package contains several jobs with data for
+particular tables. The packages are identified by **load_id**, which you can see in the printout
+above or obtain by running the following command:
 
 ```sh
 dlt pipeline chess_pipeline info
 ```
 
-You can inspect the package, get list of jobs and in case of failed ones, get the associated error
+You can inspect the package, get a list of jobs, and in the case of failed ones, get the associated error
 messages.
 - See the most recent load package info:
   ```sh
   dlt pipeline chess_pipeline load-package
   ```
-- See package info with given load id:
+- See package info with a given load id:
   ```sh
   dlt pipeline chess_pipeline load-package 1679931001.985323
   ```
-- Also see the schema changes introduced in the package:
+- Also, see the schema changes introduced in the package:
   ```sh
   dlt pipeline -v chess_pipeline load-package
   ```
 
 `dlt` stores the trace of the most recent data load. The trace contains information on the pipeline
-processing steps: `extract`, `normalize` and `load`. It also shows the last `load_info`:
+processing steps: `extract`, `normalize`, and `load`. It also shows the last `load_info`:
 
 ```sh
 dlt pipeline chess_pipeline trace
 ```
 
 You can access all this information in your pipeline script, save `load_info` and trace to the
-destination etc. Please refer to
+destination, etc. Please refer to
 [Running in production](../running-in-production/running.md#inspect-and-save-the-load-info-and-trace)
 for more details.
 
 ## 5. Detect and handle problems
 
-What happens if something goes wrong? In most cases `dlt` `run` command raises exceptions. We put a
+What happens if something goes wrong? In most cases, the `dlt` `run` command raises exceptions. We put a
 lot of effort into making the exception messages easy to understand. Reading them is the first step
 to solving your problem. Let us know if you come across one that is not clear to you
 [here](https://github.com/dlt-hub/dlt/issues/new).
@@ -151,13 +151,13 @@ to solving your problem. Let us know if you come across one that is not clear to
 ### Missing secret or configuration values
 
 The most common exception that you will encounter looks like this. Here we modify our
-`chess_pipeline.py` script to load data into postgres, but we are not providing the password.
+`chess_pipeline.py` script to load data into PostgreSQL, but we are not providing the password.
 
 ```sh
 CREDENTIALS="postgres://loader@localhost:5432/dlt_data" python chess_pipeline.py
 ...
 dlt.common.configuration.exceptions.ConfigFieldMissingException: Following fields are missing: ['password'] in configuration with spec PostgresCredentials
-    for field "password" config providers and keys were tried in following order:
+    for field "password" config providers and keys were tried in the following order:
         In Environment Variables key CHESS_PIPELINE__DESTINATION__POSTGRES__CREDENTIALS__PASSWORD was not found.
         In Environment Variables key CHESS_PIPELINE__DESTINATION__CREDENTIALS__PASSWORD was not found.
         In Environment Variables key CHESS_PIPELINE__CREDENTIALS__PASSWORD was not found.
@@ -173,11 +173,11 @@ dlt.common.configuration.exceptions.ConfigFieldMissingException: Following field
 Please refer to https://dlthub.com/docs/general-usage/credentials for more information
 ```
 
-What this exception tells you?
+What does this exception tell you?
 
 1. You are missing a `password` field ("Following fields are missing: \['password'\]").
 1. `dlt` tried to look for the password in `secrets.toml` and environment variables.
-1. `dlt` tried several locations or keys in which password could be stored, starting from
+1. `dlt` tried several locations or keys in which the password could be stored, starting from
    more precise to more general.
 
 How to fix that?
@@ -193,15 +193,15 @@ and just add the `password` to your
 credentials.password="loader"
 ```
 
-> 💡 Make sure you run the script from the same folder in which it is saved. For example
-> `python chess_demo/chess.py` will run the script from `chess_demo` folder but the current working
-> directory is folder above. This prevents `dlt` from finding `chess_demo/.dlt/secrets.toml` and
-> filling-in credentials.
+> 💡 Make sure you run the script from the same folder in which it is saved. For example,
+> `python chess_demo/chess.py` will run the script from the `chess_demo` folder, but the current working
+> directory is the folder above. This prevents `dlt` from finding `chess_demo/.dlt/secrets.toml` and
+> filling in credentials.
 
 ### Failed API or database connections and other exceptions
 
-`dlt` will raise `PipelineStepFailed` exception to inform you of a problem encountered during
-execution of particular step. You can catch those in code:
+`dlt` will raise a `PipelineStepFailed` exception to inform you of a problem encountered during
+the execution of a particular step. You can catch those in code:
 
 ```py
 from dlt.pipeline.exceptions import PipelineStepFailed
@@ -213,7 +213,7 @@ except PipelineStepFailed as step_failed:
     raise
 ```
 
-Or use `trace` command to review the last exception. Here we provided a wrong postgres password:
+Or use the `trace` command to review the last exception. Here we provided a wrong PostgreSQL password:
 
 ```sh
 dlt pipeline chess_pipeline trace
@@ -228,10 +228,10 @@ Failed due to: connection to server at "localhost" (127.0.0.1), port 5432 failed
 
 ### Failed jobs in load package
 
-In rare cases some jobs in a load package will fail in such a way that `dlt` will not be able
-to load it, even if it retries the process. In that case the job is marked as failed and additional
+In rare cases, some jobs in a load package will fail in such a way that `dlt` will not be able
+to load it, even if it retries the process. In that case, the job is marked as failed, and additional
 information is available. Please note that ([if not otherwise configured](../running-in-production//running.md#failed-jobs)), `dlt` **will raise
-exception on failed jobs and abort the package**. Aborted packages cannot be retried.
+an exception on failed jobs and abort the package**. Aborted packages cannot be retried.
 
 ```text
 Step run COMPLETED in 14.21 seconds.
@@ -243,13 +243,13 @@ Load package 1679996953.776288 is COMPLETED and contains 4 FAILED job(s)!
 
 What now?
 
-Investigate further with following command:
+Investigate further with the following command:
 
 ```sh
 dlt pipeline chess_pipeline failed-jobs
 ```
 
-To get following output:
+To get the following output:
 
 ```text
 Found pipeline chess_pipeline in /home/user-name/.dlt/pipelines
@@ -257,18 +257,19 @@ Checking failed jobs in load id '1679996953.776288'
 JOB: players_games.80eb41650c.0.jsonl(players_games)
 JOB file type: jsonl
 JOB file path: /home/user-name/.dlt/pipelines/chess_pipeline/load/loaded/1679996953.776288/failed_jobs/players_games.80eb41650c.0.jsonl
-a random fail occured
+a random fail occurred
 ```
 
 The `a random fail occurred` (on console in red) is the error message from the destination. It
 should tell you what went wrong.
 
 The most probable cause of the failed job is **the data in the job file**. You can inspect the file
-using **JOB file path** provided.
+using the **JOB file path** provided.
 
 ## Further readings
 
 - [Beef up your script for production](../running-in-production/running.md), easily add alerting,
-  retries and logging, so you are well-informed when something goes wrong.
+  retries, and logging, so you are well-informed when something goes wrong.
 - [Deploy this pipeline with GitHub Actions](deploy-a-pipeline/deploy-with-github-actions), so that
   your pipeline script is automatically executed on a schedule.
+
diff --git a/docs/website/docs/walkthroughs/share-a-dataset.md b/docs/website/docs/walkthroughs/share-a-dataset.md
index 3e231b8274..4e7ea9f843 100644
--- a/docs/website/docs/walkthroughs/share-a-dataset.md
+++ b/docs/website/docs/walkthroughs/share-a-dataset.md
@@ -6,11 +6,11 @@ keywords: [how to, share a dataset]
 
 # Moving from local to production
 
-In previous how-to guides you used the local stack to create and run your pipeline. This saved you
-the headache of setting up cloud account, credentials and often also money. Our choice for local
-"warehouse" is `duckdb`, fast, feature rich and working everywhere. However, at some point you want
+In previous how-to guides, you used the local stack to create and run your pipeline. This saved you
+the headache of setting up a cloud account, credentials, and often also money. Our choice for a local
+"warehouse" is `duckdb`, which is fast, feature-rich, and works everywhere. However, at some point, you might want
 to move to production or share the results with your colleagues. The local `duckdb` file is not
-sufficient for that! Let's move a [dataset for chess.com API we have already](run-a-pipeline.md) to
+sufficient for that! Let's move a [dataset for the chess.com API we have already](run-a-pipeline.md) to
 BigQuery:
 
 ## 1. Replace the "destination" argument with "bigquery"
@@ -19,7 +19,7 @@ BigQuery:
 import dlt
 from chess import chess
 
-if __name__ == "__main__" :
+if __name__ == "__main__":
     pipeline = dlt.pipeline(
         pipeline_name="chess_pipeline",
         destination='bigquery',
@@ -34,17 +34,17 @@ if __name__ == "__main__" :
     load_info = pipeline.run(data)
 ```
 
-And that's it regarding the code modifications! If you run the script, `dlt` will create identical
-dataset you had in `duckdb` but in BigQuery.
+And that's it regarding the code modifications! If you run the script, `dlt` will create an identical
+dataset to what you had in `duckdb` but in BigQuery.
 
 ## 2. Enable access to BigQuery and obtain credentials
 
-Please [follow those steps](../dlt-ecosystem/destinations/bigquery.md) to enable `dlt` to write data
+Please [follow these steps](../dlt-ecosystem/destinations/bigquery.md) to enable `dlt` to write data
 to BigQuery.
 
 ## 3. Add credentials to secrets.toml
 
-Please add the following section to your `secrets.toml` file, use the credentials obtained from the
+Please add the following section to your `secrets.toml` file, using the credentials obtained from the
 previous step:
 
 ```toml
@@ -67,16 +67,16 @@ Head on to the next section if you see exceptions!
 
 ## 5. Troubleshoot exceptions
 
-### Credentials Missing: ConfigFieldMissingException
+### Credentials missing: ConfigFieldMissingException
 
-You'll see this exception if `dlt` cannot find your BigQuery credentials. In the exception below all
-of them ('project_id', 'private_key', 'client_email') are missing. The exception gives you also the
+You'll see this exception if `dlt` cannot find your BigQuery credentials. In the exception below, all
+of them ('project_id', 'private_key', 'client_email') are missing. The exception also gives you the
 list of all lookups for configuration performed -
-[here we explain how to read such list](run-a-pipeline.md#missing-secret-or-configuration-values).
+[here we explain how to read such a list](run-a-pipeline.md#missing-secret-or-configuration-values).
 
 ```text
 dlt.common.configuration.exceptions.ConfigFieldMissingException: Following fields are missing: ['project_id', 'private_key', 'client_email'] in configuration with spec GcpServiceAccountCredentials
-    for field "project_id" config providers and keys were tried in following order:
+    for field "project_id" config providers and keys were tried in the following order:
         In Environment Variables key CHESS__DESTINATION__BIGQUERY__CREDENTIALS__PROJECT_ID was not found.
         In Environment Variables key CHESS__DESTINATION__CREDENTIALS__PROJECT_ID was not found.
 ```
@@ -84,29 +84,28 @@ dlt.common.configuration.exceptions.ConfigFieldMissingException: Following field
 The most common cases for the exception:
 
 1. The secrets are not in `secrets.toml` at all.
-1. The are placed in wrong section. For example the fragment below will not work:
+1. They are placed in the wrong section. For example, the fragment below will not work:
   ```toml
   [destination.bigquery] # 'credentials' missed
   project_id = "project_id"
   ```
-1. You run the pipeline script from the **different** folder from which it is saved. For example
-   `python chess_demo/chess_pipeline.py.py` will run the script from `chess_demo` folder but the
-   current working directory is folder above. This prevents `dlt` from finding
-   `chess_demo/.dlt/secrets.toml` and filling-in credentials.
+1. You run the pipeline script from a **different** folder from which it is saved. For example,
+   `python chess_demo/chess_pipeline.py` will run the script from the `chess_demo` folder but the
+   current working directory is the folder above. This prevents `dlt` from finding
+   `chess_demo/.dlt/secrets.toml` and filling in credentials.
 
 ### Placeholders still in secrets.toml
 
-Here BigQuery complain that the format of the `private_key` is incorrect. Practically this most
-often happens if you forgot to replace the placeholders in `secrets.toml` with real values:
+Here, BigQuery complains that the format of the `private_key` is incorrect. This most often happens if you forgot to replace the placeholders in `secrets.toml` with real values:
 
 ```text
 <class 'dlt.destinations.exceptions.DestinationConnectionError'>
-Connection with BigQuerySqlClient to dataset name games_data failed. Please check if you configured the credentials at all and provided the right credentials values. You can be also denied access or your internet connection may be down. The actual reason given is: No key could be detected.
+Connection with BigQuerySqlClient to dataset name games_data failed. Please check if you configured the credentials at all and provided the right credentials values. You can also be denied access, or your internet connection may be down. The actual reason given is: No key could be detected.
 ```
 
 ### BigQuery not enabled
 
-[You must enable BigQuery API.](https://console.cloud.google.com/apis/dashboard)
+[You must enable the BigQuery API.](https://console.cloud.google.com/apis/dashboard)
 
 ```text
 <class 'google.api_core.exceptions.Forbidden'>
@@ -119,7 +118,7 @@ Job ID: a5f84253-3c10-428b-b2c8-1a09b22af9b2
 
 ### Lack of permissions to create jobs
 
-Add `BigQuery Job User` as described in the
+Add `BigQuery Job User` as described on the
 [destination page](../dlt-ecosystem/destinations/bigquery.md).
 
 ```text
@@ -132,7 +131,7 @@ Job ID: c1476d2c-883c-43f7-a5fe-73db195e7bcd
 
 ### Lack of permissions to query/write data
 
-Add `BigQuery Data Editor` as described in the
+Add `BigQuery Data Editor` as described on the
 [destination page](../dlt-ecosystem/destinations/bigquery.md).
 
 ```text
@@ -145,8 +144,7 @@ Job ID: 299a92a3-7761-45dd-a433-79fdeb0c1a46
 
 ### Lack of billing / BigQuery in sandbox mode
 
-`dlt` does not support BigQuery when project has no billing enabled. If you see a stack trace where
-following warning appears:
+`dlt` does not support BigQuery when the project has no billing enabled. If you see a stack trace where the following warning appears:
 
 ```text
 <class 'dlt.destinations.exceptions.DatabaseTransientException'>
@@ -159,4 +157,5 @@ or
 2023-06-08 16:16:26,769|[WARNING]|8096|dlt|load.py|complete_jobs:198|Job for players_games_83b8ac9e98_4_jsonl retried in load 1686233775.932288 with message {"error_result":{"reason":"billingNotEnabled","message":"Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. Table expiration time must be less than 60 days while in sandbox mode."},"errors":[{"reason":"billingNotEnabled","message":"Billing has not been enabled for this project. Enable billing at https://console.cloud.google.com/billing. Table expiration time must be less than 60 days while in sandbox mode."}],"job_start":"2023-06-08T14:16:26.850000Z","job_end":"2023-06-08T14:16:26.850000Z","job_id":"players_games_83b8ac9e98_4_jsonl"}
 ```
 
-you must enable the billing.
+you must enable billing.
+
diff --git a/docs/website/docs/walkthroughs/zendesk-weaviate.md b/docs/website/docs/walkthroughs/zendesk-weaviate.md
index 31e8aed73e..f66fa73530 100644
--- a/docs/website/docs/walkthroughs/zendesk-weaviate.md
+++ b/docs/website/docs/walkthroughs/zendesk-weaviate.md
@@ -6,11 +6,11 @@ keywords: [how to, zendesk, weaviate, vector database, vector search]
 
 # How to import ticket data from Zendesk API to Weaviate
 
-Zendesk is a cloud-based customer service and support platform. Zendesk Support API, which is also known as the Ticketing API lets’s you access support tickets data. By analyzing this data, businesses can gain insights into customer needs, behavior, trends, and make data-driven decisions. The newest type of databases, vector databases, can help in advanced analysis of tickets data such as identifying common issues and sentiment analysis.
+Zendesk is a cloud-based customer service and support platform. Zendesk Support API, also known as the Ticketing API, lets you access support tickets data. By analyzing this data, businesses can gain insights into customer needs, behavior, trends, and make data-driven decisions. The newest type of databases, vector databases, can help in advanced analysis of tickets data such as identifying common issues and sentiment analysis.
 
 In this guide, we’ll show you how to import Zendesk ticket data to one of the vector databases, Weaviate. We’ll use dlt to connect to the Zendesk API, extract the ticket data, and load it into Weaviate for querying.
 
-For our example we will use "subject" and "description" fields from a ticket as a text content to perform vector search on.
+For our example, we will use the "subject" and "description" fields from a ticket as text content to perform vector search on.
 
 ## Prerequisites
 
@@ -41,15 +41,15 @@ We're going to use some ready-made components from the [sources](../dlt-ecosyste
     dlt init zendesk weaviate
     ```
 
-The last command [dlt init](../reference/command-line-interface#dlt-init) initializes dlt project: it downloads the verified source and installs it in your project folder.
+The last command [dlt init](../reference/command-line-interface#dlt-init) initializes a dlt project: it downloads the verified source and installs it in your project folder.
 
 ## Configuration
 
 ### Source
 
-Before we configure the source and destination, you need to make sure you have access to API in both Zendesk and Weaviate.
+Before we configure the source and destination, you need to make sure you have access to the API in both Zendesk and Weaviate.
 
-Head to the [Zendesk](../dlt-ecosystem/verified-sources/zendesk.md) docs to see how to fetch credentials for Zendesk API. In this guide, we're using the email address and password authentication method. Once you have fetched the credentials, you can configure the source. Add the following lines to the dlt secrets file `~/.dlt/secrets.toml`:
+Head to the [Zendesk](../dlt-ecosystem/verified-sources/zendesk.md) docs to see how to fetch credentials for the Zendesk API. In this guide, we're using the email address and password authentication method. Once you have fetched the credentials, you can configure the source. Add the following lines to the dlt secrets file `~/.dlt/secrets.toml`:
 
 ```toml
 [sources.zendesk.zendesk_support.credentials]
@@ -60,7 +60,7 @@ email = "..."
 
 ### Destination
 
-For the destination we're using [Weaviate Cloud Services](https://console.weaviate.cloud/). You would need to create an account and get a URL and an API key for your Weaviate instance. We're also be using [OpenAI API](https://platform.openai.com/) to generate embeddings for the text data needed to perform vector search. If you haven't already, you would need to create an account and get an API key for OpenAI API.
+For the destination, we're using [Weaviate Cloud Services](https://console.weaviate.cloud/). You will need to create an account and get a URL and an API key for your Weaviate instance. We're also using the [OpenAI API](https://platform.openai.com/) to generate embeddings for the text data needed to perform vector search. If you haven't already, you will need to create an account and get an API key for the OpenAI API.
 
 When you have the credentials, add more lines to the dlt secrets file `~/.dlt/secrets.toml`:
 
@@ -75,7 +75,7 @@ X-OpenAI-Api-Key = "sk-..."
 
 ### Customizing the pipeline
 
-When you run `dlt init zendesk weaviate`, dlt creates a file called `zendesk_pipeline.py` in the current directory. This file contains an example pipeline that you can use to load data from Zendesk source. Let's edit this file to make it work for our use case:
+When you run `dlt init zendesk weaviate`, dlt creates a file called `zendesk_pipeline.py` in the current directory. This file contains an example pipeline that you can use to load data from a Zendesk source. Let's edit this file to make it work for our use case:
 
 ```py
 import dlt
@@ -114,8 +114,8 @@ if __name__ == "__main__":
 Let's go through the code above step by step:
 
 1. We create a pipeline with the name `weaviate_zendesk_pipeline` and the destination `weaviate`.
-2. Then, we initialize the Zendesk verified source. We only need to load the tickets data, so we get `tickets` resource from the source by getting the `tickets` attribute.
-3. Weaviate is a special kind of destination that requires vectorizing (or [embedding](https://en.wikipedia.org/wiki/Word_embedding)) the data before loading it. Here, we use the `weaviate_adapter()` function to tell dlt which fields Weaviate should vectorize. In our case, we vectorize the `subject` and `description` fields from each ticket. That means that Weaviate will be able to perform vector search (or similarity search) on content of these fields.
+2. Then, we initialize the Zendesk verified source. We only need to load the tickets data, so we get the `tickets` resource from the source by accessing the `tickets` attribute.
+3. Weaviate is a special kind of destination that requires vectorizing (or [embedding](https://en.wikipedia.org/wiki/Word_embedding)) the data before loading it. Here, we use the `weaviate_adapter()` function to tell dlt which fields Weaviate should vectorize. In our case, we vectorize the `subject` and `description` fields from each ticket. That means that Weaviate will be able to perform vector search (or similarity search) on the content of these fields.
 4. `pipeline.run()` runs the pipeline and returns information about the load process.
 
 ### Running the pipeline
@@ -157,7 +157,7 @@ response = (
 print(response)
 ```
 
-The above code instantiates a Weaviate client and does a similarity search on the data we loaded. The query searches for tickets that are similar to the text “problems with password”. The output should be similar to:
+The above code instantiates a Weaviate client and performs a similarity search on the data we loaded. The query searches for tickets that are similar to the text “problems with password”. The output should be similar to:
 
 ```json
 {
@@ -186,11 +186,12 @@ The above code instantiates a Weaviate client and does a similarity search on th
 
 ## Incremental loading
 
-During our first load, we loaded all tickets from Zendesk API. But what if users create new tickets? Or update existing ones? dlt solves this case by supporting [incremental loading](../general-usage/incremental-loading.md). And you don't need to change anything in your pipeline to enable it: Zendesk source supports incremental loading out of the box based on the `updated_at` field. That means that dlt will only load tickets that were created or updated after the last load.
+During our first load, we loaded all tickets from the Zendesk API. But what if users create new tickets or update existing ones? dlt solves this case by supporting [incremental loading](../general-usage/incremental-loading.md). You don't need to change anything in your pipeline to enable it: the Zendesk source supports incremental loading out of the box based on the `updated_at` field. That means dlt will only load tickets that were created or updated after the last load.
 
 ## What's next?
 
-If you interested in learning more about Weaviate support in dlt, check out the [Weaviate destination](../dlt-ecosystem/destinations/weaviate.md) docs. We have also have demos of different sources of data for Weaviate in our Jyputer notebooks:
+If you are interested in learning more about Weaviate support in dlt, check out the [Weaviate destination](../dlt-ecosystem/destinations/weaviate.md) docs. We also have demos of different sources of data for Weaviate in our Jupyter notebooks:
 
 - [Loading PDF data into Weaviate](https://github.com/dlt-hub/dlt_demos/blob/main/pdf_to_weaviate.ipynb)
-- [Loading data from SQL database into Weaviate](https://github.com/dlt-hub/dlt_demos/blob/main/sql_to_weaviate.ipynb)
+- [Loading data from a SQL database into Weaviate](https://github.com/dlt-hub/dlt_demos/blob/main/sql_to_weaviate.ipynb)
+

From df5478995eef27310785cd203cfb2c4f91928e51 Mon Sep 17 00:00:00 2001
From: Nicolas Estrada <nicolas.estrada@alum.mit.edu>
Date: Wed, 2 Oct 2024 13:30:18 +0200
Subject: [PATCH 18/29] fix: UUIDs are not an unknown data type (logging)
 (#1914)

Co-authored-by: Nicolas ESTRADA <nicolas.estrada@42network.org>
---
 dlt/sources/sql_database/schema_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlt/sources/sql_database/schema_types.py b/dlt/sources/sql_database/schema_types.py
index 2edb884d3f..f372de36e0 100644
--- a/dlt/sources/sql_database/schema_types.py
+++ b/dlt/sources/sql_database/schema_types.py
@@ -86,7 +86,7 @@ def sqla_col_to_column_schema(
     if hasattr(sqltypes, "Uuid") and isinstance(sql_t, sqltypes.Uuid):
         # we represent UUID as text by default, see default_table_adapter
         col["data_type"] = "text"
-    if isinstance(sql_t, sqltypes.Numeric):
+    elif isinstance(sql_t, sqltypes.Numeric):
         # check for Numeric type first and integer later, some numeric types (ie. Oracle)
         # derive from both
         # all Numeric types that are returned as floats will assume "double" type

From 543044e031df8ac25b493e3012e7df0f5847dc0b Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Wed, 2 Oct 2024 14:11:18 +0200
Subject: [PATCH 19/29] Docs: capitalize TOML (#1909)

---
 docs/website/docs/dlt-ecosystem/destinations/athena.md    | 2 +-
 .../website/docs/dlt-ecosystem/destinations/clickhouse.md | 2 +-
 docs/website/docs/dlt-ecosystem/destinations/dremio.md    | 2 +-
 .../website/docs/dlt-ecosystem/destinations/filesystem.md | 2 +-
 docs/website/docs/dlt-ecosystem/destinations/mssql.md     | 4 ++--
 docs/website/docs/dlt-ecosystem/destinations/postgres.md  | 6 +++---
 docs/website/docs/dlt-ecosystem/destinations/redshift.md  | 2 +-
 docs/website/docs/dlt-ecosystem/destinations/snowflake.md | 8 +++++---
 docs/website/docs/dlt-ecosystem/destinations/synapse.md   | 4 ++--
 docs/website/docs/dlt-ecosystem/staging.md                | 2 +-
 .../dlt-ecosystem/verified-sources/filesystem/basic.md    | 2 +-
 .../verified-sources/sql_database/advanced.md             | 7 +++----
 docs/website/docs/general-usage/credentials/advanced.md   | 6 +++---
 docs/website/docs/general-usage/credentials/setup.md      | 2 +-
 docs/website/docs/running-in-production/running.md        | 2 +-
 docs/website/docs/walkthroughs/add_credentials.md         | 5 ++++-
 .../deploy-a-pipeline/deploy-with-airflow-composer.md     | 6 +++---
 17 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/destinations/athena.md b/docs/website/docs/dlt-ecosystem/destinations/athena.md
index f541b6ad43..822d1c9c07 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/athena.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/athena.md
@@ -46,7 +46,7 @@ so pip does not fail on backtracking.
 
 To edit the `dlt` credentials file with your secret info, open `.dlt/secrets.toml`. You will need to provide a `bucket_url`, which holds the uploaded parquet files, a `query_result_bucket`, which Athena uses to write query results to, and credentials that have write and read access to these two buckets as well as the full Athena access AWS role.
 
-The toml file looks like this:
+The TOML file looks like this:
 
 ```toml
 [destination.filesystem]
diff --git a/docs/website/docs/dlt-ecosystem/destinations/clickhouse.md b/docs/website/docs/dlt-ecosystem/destinations/clickhouse.md
index 0a0259a6c5..74f8cf9412 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/clickhouse.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/clickhouse.md
@@ -89,7 +89,7 @@ To load data into ClickHouse, you need to create a ClickHouse database. While we
 2. You can pass a database connection string similar to the one used by the `clickhouse-driver` library. The credentials above will look like this:
 
    ```toml
-   # Keep it at the top of your toml file before any section starts.
+   # Keep it at the top of your TOML file, before any section starts
    destination.clickhouse.credentials="clickhouse://dlt:Dlt*12345789234567@localhost:9000/dlt?secure=1"
    ```
 
diff --git a/docs/website/docs/dlt-ecosystem/destinations/dremio.md b/docs/website/docs/dlt-ecosystem/destinations/dremio.md
index 82665febf1..792e1f1c79 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/dremio.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/dremio.md
@@ -32,7 +32,7 @@ or with `pip install "dlt[dremio,s3]"` which will install `s3fs`, `pyarrow`, and
 
 To edit the `dlt` credentials file with your secret info, open `.dlt/secrets.toml`. You will need to provide a `bucket_url` which holds the uploaded parquet files.
 
-The toml file looks like this:
+The TOML file looks like this:
 
 ```toml
 [destination.filesystem]
diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
index 3e562dfb84..d36760bcfd 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md
@@ -234,7 +234,7 @@ export DESTINATION__FILESYSTEM__KWARGS = '{"auto_mkdir": true/false}'
 bucket_url = 'C:\a\b\c'
 ```
 
-In the example above, we specify `bucket_url` using **toml's literal strings** that do not require [escaping of backslashes](https://github.com/toml-lang/toml/blob/main/toml.md#string).
+In the example above, we specify `bucket_url` using **TOML's literal strings** that do not require [escaping of backslashes](https://github.com/toml-lang/toml/blob/main/toml.md#string).
 
 ```toml
 [destination.unc_destination]
diff --git a/docs/website/docs/dlt-ecosystem/destinations/mssql.md b/docs/website/docs/dlt-ecosystem/destinations/mssql.md
index 5589f18d7c..9e830407dc 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/mssql.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/mssql.md
@@ -64,7 +64,7 @@ LongAsMax="yes"
 
 You can also pass a SQLAlchemy-like database connection:
 ```toml
-# keep it at the top of your toml file! before any section starts
+# Keep it at the top of your TOML file, before any section starts
 destination.mssql.credentials="mssql://loader:<password>@loader.database.windows.net/dlt_data?TrustServerCertificate=yes&Encrypt=yes&LongAsMax=yes"
 ```
 
@@ -145,7 +145,7 @@ driver="ODBC Driver 18 for SQL Server"
 When using a SQLAlchemy connection string, replace spaces with `+`:
 
 ```toml
-# keep it at the top of your toml file! before any section starts
+# Keep it at the top of your TOML file, before any section starts
 destination.mssql.credentials="mssql://loader:<password>@loader.database.windows.net/dlt_data?driver=ODBC+Driver+18+for+SQL+Server"
 ```
 
diff --git a/docs/website/docs/dlt-ecosystem/destinations/postgres.md b/docs/website/docs/dlt-ecosystem/destinations/postgres.md
index eb886c6674..4021f4d10e 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/postgres.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/postgres.md
@@ -61,7 +61,7 @@ connect_timeout = 15
 
 You can also pass a database connection string similar to the one used by the `psycopg2` library or [SQLAlchemy](https://docs.sqlalchemy.org/en/20/core/engines.html#postgresql). The credentials above will look like this:
 ```toml
-# keep it at the top of your toml file! before any section starts
+# Keep it at the top of your TOML file, before any section starts
 destination.postgres.credentials="postgresql://loader:<password>@localhost/dlt_data?connect_timeout=15"
 ```
 
@@ -86,11 +86,11 @@ If you set the [`replace` strategy](../../general-usage/full-loading.md) to `sta
 `postgres` supports various timestamp types, which can be configured using the column flags `timezone` and `precision` in the `dlt.resource` decorator or the `pipeline.run` method.
 
 - **Precision**: allows you to specify the number of decimal places for fractional seconds, ranging from 0 to 6. It can be used in combination with the `timezone` flag.
-- **Timezone**: 
+- **Timezone**:
   - Setting `timezone=False` maps to `TIMESTAMP WITHOUT TIME ZONE`.
   - Setting `timezone=True` (or omitting the flag, which defaults to `True`) maps to `TIMESTAMP WITH TIME ZONE`.
 
-#### Example precision and timezone: TIMESTAMP (3) WITHOUT TIME ZONE 
+#### Example precision and timezone: TIMESTAMP (3) WITHOUT TIME ZONE
 ```py
 @dlt.resource(
     columns={"event_tstamp": {"data_type": "timestamp", "precision": 3, "timezone": False}},
diff --git a/docs/website/docs/dlt-ecosystem/destinations/redshift.md b/docs/website/docs/dlt-ecosystem/destinations/redshift.md
index c503ad93e7..a3409ec639 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/redshift.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/redshift.md
@@ -63,7 +63,7 @@ To load data into Redshift, you need to create a Redshift cluster and enable acc
 
 You can also pass a database connection string similar to the one used by the `psycopg2` library or [SQLAlchemy](https://docs.sqlalchemy.org/en/20/core/engines.html#postgresql). The credentials above will look like this:
 ```toml
-# Keep it at the top of your toml file! Before any section starts
+# Keep it at the top of your TOML file, before any section starts
 destination.redshift.credentials="redshift://loader:<password>@localhost/dlt_data?connect_timeout=15"
 ```
 
diff --git a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md
index 2ea08778c6..084ecb9a62 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md
@@ -80,7 +80,7 @@ The **password authentication** is not any different from other databases like P
 
 You can also pass credentials as a database connection string. For example:
 ```toml
-# keep it at the top of your toml file! before any section starts
+# Keep it at the top of your TOML file, before any section starts
 destination.snowflake.credentials="snowflake://loader:<password>@kgiotue-wn98412/dlt_data?warehouse=COMPUTE_WH&role=DLT_LOADER_ROLE"
 
 ```
@@ -98,7 +98,7 @@ private_key_passphrase="passphrase"
 
 If you pass a passphrase in the connection string, please URL encode it.
 ```toml
-# keep it at the top of your toml file! before any section starts
+# Keep it at the top of your TOML file, before any section starts
 destination.snowflake.credentials="snowflake://loader:<password>@kgiotue-wn98412/dlt_data?private_key=<base64 encoded pem>&private_key_passphrase=<url encoded passphrase>"
 ```
 
@@ -117,6 +117,7 @@ In the case of external authentication, you need to find documentation for your
 ### Additional connection options
 
 We pass all query parameters to the `connect` function of the Snowflake Python Connector. For example:
+
 ```toml
 [destination.snowflake.credentials]
 database = "dlt_data"
@@ -126,7 +127,8 @@ timezone="UTC"
 # keep session alive beyond 4 hours
 client_session_keep_alive=true
 ```
-This will set the timezone and session keep alive. Mind that if you use `toml`, your configuration is typed. The alternative:
+
+This will set the timezone and session keep alive. Mind that if you use TOML, your configuration is typed. The alternative:
 `"snowflake://loader/dlt_data?authenticator=oauth&timezone=UTC&client_session_keep_alive=true"`
 will pass `client_session_keep_alive` as a string to the connect method (which we didn't verify if it works).
 
diff --git a/docs/website/docs/dlt-ecosystem/destinations/synapse.md b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
index 51721ec298..043d937d06 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/synapse.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/synapse.md
@@ -81,7 +81,7 @@ host = "your_synapse_workspace_name.sql.azuresynapse.net"
 Equivalently, you can also pass a connection string as follows:
 
 ```toml
-# keep it at the top of your toml file! before any section starts
+# Keep it at the top of your TOML file, before any section starts
 destination.synapse.credentials = "synapse://loader:your_loader_password@your_synapse_workspace_name.azuresynapse.net/yourpool"
 ```
 
@@ -215,7 +215,7 @@ connect_timeout = 15
 `port` and `connect_timeout` can also be included in the connection string:
 
 ```toml
-# keep it at the top of your toml file! before any section starts
+# Keep it at the top of your TOML file, before any section starts
 destination.synapse.credentials = "synapse://loader:your_loader_password@your_synapse_workspace_name.azuresynapse.net:1433/yourpool?connect_timeout=15"
 ```
 
diff --git a/docs/website/docs/dlt-ecosystem/staging.md b/docs/website/docs/dlt-ecosystem/staging.md
index 147c1f881d..7757d3a90d 100644
--- a/docs/website/docs/dlt-ecosystem/staging.md
+++ b/docs/website/docs/dlt-ecosystem/staging.md
@@ -66,7 +66,7 @@ In essence, you need to set up two destinations and then pass them to `dlt.pipel
 
     Please follow our guide in the [redshift destination documentation](destinations/redshift.md). In your `secrets.toml`, you added:
     ```toml
-    # keep it at the top of your toml file! before any section starts
+    # Keep it at the top of your TOML file, before any section starts
     destination.redshift.credentials="redshift://loader:<password>@localhost/dlt_data?connect_timeout=15"
     ```
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
index 5ae7de82da..ac4af7862f 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
@@ -253,7 +253,7 @@ bucket_url='~\Documents\csv_files\'
 </Tabs>
 
 You can also specify the credentials using environment variables. The name of the corresponding environment
-variable should be slightly different from the corresponding name in the `toml` file. Simply replace dots `.` with double
+variable should be slightly different from the corresponding name in the TOML file. Simply replace dots `.` with double
 underscores `__`:
 
 ```sh
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md
index 74012b4311..886d83658d 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database/advanced.md
@@ -134,13 +134,12 @@ source = sql_database(
 dlt.pipeline("demo").run(source)
 ```
 
-## Configuring with TOML/environment variables
-
-You can set most of the arguments of `sql_database()` and `sql_table()` directly in the `.toml` files and/or as environment variables. `dlt` automatically injects these values into the pipeline script.
+## Configuring with TOML or environment variables
+You can set most of the arguments of `sql_database()` and `sql_table()` directly in the TOML files or as environment variables. `dlt` automatically injects these values into the pipeline script.
 
 This is particularly useful with `sql_table()` because you can maintain a separate configuration for each table (below we show **secrets.toml** and **config.toml**; you are free to combine them into one):
 
-The examples below show how you can set arguments in any of the `.toml` files (`secrets.toml` or `config.toml`):
+The examples below show how you can set arguments in any of the TOML files (`secrets.toml` or `config.toml`):
 1. Specifying connection string:
     ```toml
     [sources.sql_database]
diff --git a/docs/website/docs/general-usage/credentials/advanced.md b/docs/website/docs/general-usage/credentials/advanced.md
index f4b2ad0d11..2d7745eb44 100644
--- a/docs/website/docs/general-usage/credentials/advanced.md
+++ b/docs/website/docs/general-usage/credentials/advanced.md
@@ -91,9 +91,9 @@ Now,
    * connection string (used in SQL Alchemy) (in code and via config providers).
    * if nothing is passed, the default credentials are used (i.e., those present on Cloud Function runner)
 
-## Toml files structure
+## TOML files structure
 
-`dlt` arranges the sections of [toml files](setup/#secretstoml-and-configtoml) into a **default layout** that is expected by the [injection mechanism](#injection-mechanism).
+`dlt` arranges the sections of [TOML files](setup/#secretstoml-and-configtoml) into a **default layout** that is expected by the [injection mechanism](#injection-mechanism).
 This layout makes it easy to configure simple cases but also provides room for more explicit sections and complex cases, i.e., having several sources with different credentials
 or even hosting several pipelines in the same project sharing the same config and credentials.
 
@@ -158,7 +158,7 @@ dlt.config["sheet_id"] = "23029402349032049"
 dlt.secrets["destination.postgres.credentials"] = BaseHook.get_connection('postgres_dsn').extra
 ```
 
-This will mock the `toml` provider to desired values.
+This will mock the TOML provider to desired values.
 
 ## Example
 
diff --git a/docs/website/docs/general-usage/credentials/setup.md b/docs/website/docs/general-usage/credentials/setup.md
index ccd131d9fa..9d459cc298 100644
--- a/docs/website/docs/general-usage/credentials/setup.md
+++ b/docs/website/docs/general-usage/credentials/setup.md
@@ -237,7 +237,7 @@ The TOML provider also has the capability to read files from `~/.dlt/` (located
 ### Structure
 
 `dlt` organizes sections in TOML files in a specific structure required by the [injection mechanism](advanced/#injection-mechanism).
-Understanding this structure gives you more flexibility in setting credentials. For more details, see [Toml files structure](advanced/#toml-files-structure).
+Understanding this structure gives you more flexibility in setting credentials. For more details, see [TOML files structure](advanced/#toml-files-structure).
 
 ## Custom providers
 
diff --git a/docs/website/docs/running-in-production/running.md b/docs/website/docs/running-in-production/running.md
index e7b571548d..d0563ec7de 100644
--- a/docs/website/docs/running-in-production/running.md
+++ b/docs/website/docs/running-in-production/running.md
@@ -150,7 +150,7 @@ log_format="JSON"
 - `json` to get the log in JSON format.
 - [Python standard log format specifier](https://docs.python.org/3/library/logging.html#logrecord-attributes).
 
-As with any other configuration, you can use environment variables instead of the `toml` file.
+As with any other configuration, you can use environment variables instead of the TOML file.
 
 - `RUNTIME__LOG_LEVEL` to set the log level.
 - `LOG_FORMAT` to set the log format.
diff --git a/docs/website/docs/walkthroughs/add_credentials.md b/docs/website/docs/walkthroughs/add_credentials.md
index d269e4cb97..799e448fed 100644
--- a/docs/website/docs/walkthroughs/add_credentials.md
+++ b/docs/website/docs/walkthroughs/add_credentials.md
@@ -24,7 +24,10 @@ project_id = "project_id" # please set me up!
 private_key = "private_key" # please set me up!
 client_email = "client_email" # please set me up!
 ```
-> Note that for toml names are case-sensitive and sections are separated with ".".
+
+:::note
+Keys in TOML files are case-sensitive and sections are separated with a period (`.`).
+:::
 
 For destination credentials, read the [documentation pages for each destination](../dlt-ecosystem/destinations) to create and configure credentials.
 
diff --git a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md
index 0b4e0daffc..4921acc036 100644
--- a/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md
+++ b/docs/website/docs/walkthroughs/deploy-a-pipeline/deploy-with-airflow-composer.md
@@ -93,7 +93,7 @@ If you are planning to run the pipeline with Google Cloud Composer, follow the n
 pipedrive_api_key = "c66..."
 ```
 
-> 💡 The `deploy` command will use an [Airflow variable](#4-add-credentials) called `dlt_secrets_toml` to store all the required secrets as a `toml` fragment. You can also use **environment variables** by passing the `--secrets-format env` option:
+The `deploy` command will use an [Airflow variable](#4-add-credentials) called `dlt_secrets_toml` to store all the required secrets as a TOML fragment. You can also use **environment variables** by passing the `--secrets-format env` option:
 
 ```sh
 dlt deploy pipedrive_pipeline.py airflow-composer --secrets-format env
@@ -384,7 +384,7 @@ There are two ways to pass the credentials:
      will be displayed in the output:
 
      ```sh
-     3. Add the following toml-string to the Airflow UI as the dlt_secrets_toml variable.
+     3. Add the following TOML-string to the Airflow UI as the dlt_secrets_toml variable.
 
      [sources.pipedrive]
      pipedrive_api_key = "c66c..."
@@ -392,7 +392,7 @@ There are two ways to pass the credentials:
 
    - Launch the Airflow UI, head to the **Admin** top-level menu, and select **Variables**.
    - Add a new variable with the name `dlt_secrets_toml`.
-   - Paste the `toml` fragment displayed by the `deploy` command.
+   - Paste the TOML fragment displayed by the `deploy` command.
    - 💡 The content of this variable will be used by the `dlt` Airflow helper instead of the local `secrets.toml` which you are familiar with. If your local secrets file contains anything else you want to access on Airflow, you are good to just copy the local `secrets.toml` content to the `dlt_secrets_toml` variable.
 
 1. As environment variables.

From db7d1bd88da9f1fa623a6f6aeac8424b86e7a4f0 Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Thu, 3 Oct 2024 17:28:19 +0200
Subject: [PATCH 20/29] Fix the broken telementry docs link (#1917)

---
 dlt/common/runtime/anon_tracker.py              | 2 +-
 dlt/sources/pipeline_templates/.dlt/config.toml | 2 +-
 docs/website/netlify.toml                       | 8 ++++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/dlt/common/runtime/anon_tracker.py b/dlt/common/runtime/anon_tracker.py
index b5f2dc4148..2e45daa65b 100644
--- a/dlt/common/runtime/anon_tracker.py
+++ b/dlt/common/runtime/anon_tracker.py
@@ -155,7 +155,7 @@ def _default_context_fields() -> TExecutionContext:
     global _TRACKER_CONTEXT
 
     if not _TRACKER_CONTEXT:
-        # Make sure to update the example in docs/docs/telemetry/telemetry.mdx
+        # Make sure to update the example in docs/reference/telemetry.md
         # if you change / add context
         _TRACKER_CONTEXT = get_execution_context()
 
diff --git a/dlt/sources/pipeline_templates/.dlt/config.toml b/dlt/sources/pipeline_templates/.dlt/config.toml
index 634427baa6..ee5c16abe9 100644
--- a/dlt/sources/pipeline_templates/.dlt/config.toml
+++ b/dlt/sources/pipeline_templates/.dlt/config.toml
@@ -2,4 +2,4 @@
 
 [runtime]
 log_level="WARNING"  # the system log level of dlt
-# use the dlthub_telemetry setting to enable/disable anonymous usage data reporting, see https://dlthub.com/docs/telemetry
+# use the dlthub_telemetry setting to enable/disable anonymous usage data reporting, see https://dlthub.com/docs/reference/telemetry
diff --git a/docs/website/netlify.toml b/docs/website/netlify.toml
index 76c0a15c03..e5d3550f5c 100644
--- a/docs/website/netlify.toml
+++ b/docs/website/netlify.toml
@@ -29,8 +29,12 @@ status = 301
 
 [[redirects]]
 from = "/docs/tutorial/intro"
-to = "docs/tutorial/load-data-from-an-api"
+to = "/docs/tutorial/load-data-from-an-api"
 
 [[redirects]]
 from = "/docs/tutorial/grouping-resources"
-to = "docs/tutorial/load-data-from-an-api"
+to = "/docs/tutorial/load-data-from-an-api"
+
+[[redirects]]
+from = "/docs/telemetry"
+to = "/docs/reference/telemetry"
\ No newline at end of file

From d44d2befb7eb85fa1a011f74d9893fbda39ee48d Mon Sep 17 00:00:00 2001
From: Violetta Mishechkina <sansiositres@gmail.com>
Date: Fri, 4 Oct 2024 17:33:49 +0200
Subject: [PATCH 21/29] Docs: replace AuthBase with AuthConfigBase in example
 (#1738)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Willi Müller <willi.mueller@posteo.de>
---
 docs/website/docs/general-usage/http/rest-client.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/docs/website/docs/general-usage/http/rest-client.md b/docs/website/docs/general-usage/http/rest-client.md
index 125604ab94..c1606b99bb 100644
--- a/docs/website/docs/general-usage/http/rest-client.md
+++ b/docs/website/docs/general-usage/http/rest-client.md
@@ -438,11 +438,10 @@ The available authentication methods are defined in the `dlt.sources.helpers.res
 - [BearerTokenAuth](#bearer-token-authentication)
 - [APIKeyAuth](#api-key-authentication)
 - [HttpBasicAuth](#http-basic-authentication)
-- [OAuth2ClientCredentials](#oauth20-authorization)
+- [OAuth2ClientCredentials](#oauth-20-authorization)
 
-For specific use cases, you can [implement custom authentication](#implementing-custom-authentication) by subclassing the `AuthBase` class from the Requests library.
-For specific flavors of OAuth 2.0, you can [implement custom OAuth 2.0](#oauth2-authorization)
-by subclassing `OAuth2ClientCredentials`.
+For specific use cases, you can [implement custom authentication](#implementing-custom-authentication) by subclassing the `AuthConfigBase` class from the `dlt.sources.helpers.rest_client.auth` module.
+For specific flavors of OAuth 2.0, you can [implement custom OAuth 2.0](#oauth-20-authorization) by subclassing `OAuth2ClientCredentials`.
 
 ### Bearer token authentication
 
@@ -565,12 +564,12 @@ response = client.get("/users")
 
 ### Implementing custom authentication
 
-You can implement custom authentication by subclassing the `AuthBase` class and implementing the `__call__` method:
+You can implement custom authentication by subclassing the `AuthConfigBase` class and implementing the `__call__` method:
 
 ```py
-from requests.auth import AuthBase
+from dlt.sources.helpers.rest_client.auth import AuthConfigBase
 
-class CustomAuth(AuthBase):
+class CustomAuth(AuthConfigBase):
     def __init__(self, token):
         self.token = token
 

From 6c504d0c6f8ffbfea0354fa029a21196dda383ce Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Fri, 4 Oct 2024 18:10:06 +0200
Subject: [PATCH 22/29] Docs: make naming consistent in the cloud storage &
 file system source (#1835)

---
 .../docs/dlt-ecosystem/file-formats/csv.md    |  8 +--
 .../docs/dlt-ecosystem/file-formats/jsonl.md  |  8 +--
 .../verified-sources/filesystem/advanced.md   |  4 +-
 .../verified-sources/filesystem/basic.md      | 59 +++++++------------
 .../verified-sources/filesystem/index.md      | 10 ++--
 docs/website/sidebars.js                      |  2 +-
 6 files changed, 37 insertions(+), 54 deletions(-)

diff --git a/docs/website/docs/dlt-ecosystem/file-formats/csv.md b/docs/website/docs/dlt-ecosystem/file-formats/csv.md
index 6b9ff68269..687ae3085c 100644
--- a/docs/website/docs/dlt-ecosystem/file-formats/csv.md
+++ b/docs/website/docs/dlt-ecosystem/file-formats/csv.md
@@ -1,13 +1,13 @@
 ---
-title: csv
-description: The csv file format
+title: CSV
+description: The CSV file format
 keywords: [csv, file formats]
 ---
 import SetTheFormat from './_set_the_format.mdx';
 
 # CSV file format
 
-**csv** is the most basic file format for storing tabular data, where all values are strings and are separated by a delimiter (typically a comma).
+**CSV** is the most basic file format for storing tabular data, where all values are strings and are separated by a delimiter (typically a comma).
 `dlt` uses it for specific use cases - mostly for performance and compatibility reasons.
 
 Internally, we use two implementations:
@@ -16,7 +16,7 @@ Internally, we use two implementations:
 
 ## Supported destinations
 
-The `csv` format is supported by the following destinations: **Postgres**, **Filesystem**, **Snowflake**
+The CSV format is supported by the following destinations: **Postgres**, **Filesystem**, **Snowflake**
 
 ## How to configure
 
diff --git a/docs/website/docs/dlt-ecosystem/file-formats/jsonl.md b/docs/website/docs/dlt-ecosystem/file-formats/jsonl.md
index 54e5b1cbd2..f1783aa29e 100644
--- a/docs/website/docs/dlt-ecosystem/file-formats/jsonl.md
+++ b/docs/website/docs/dlt-ecosystem/file-formats/jsonl.md
@@ -1,11 +1,11 @@
 ---
-title: jsonl
-description: The jsonl file format
-keywords: [jsonl, file formats]
+title: JSONL
+description: The JSONL file format or JSON Delimited stores several JSON documents in one file. The JSON documents are separated by a new line.
+keywords: [jsonl, file formats, json delimited, jsonl file format]
 ---
 import SetTheFormat from './_set_the_format.mdx';
 
-# jsonl - JSON delimited
+# JSONL - JSON Lines - JSON Delimited
 
 JSON delimited is a file format that stores several JSON documents in one file. The JSON documents are separated by a new line.
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/advanced.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/advanced.md
index e1eeca0ee9..a66a7b1d7f 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/advanced.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/advanced.md
@@ -1,5 +1,5 @@
 ---
-title: Advanced Filesystem Usage
+title: Advanced filesystem usage
 description: Use filesystem source as a building block
 keywords: [readers source and filesystem, files, filesystem, readers source, cloud storage]
 ---
@@ -54,7 +54,7 @@ When using a nested or recursive glob pattern, `relative_path` will include the
 
 ## Create your own transformer
 
-Although the `filesystem` resource yields the files from cloud storage or a local filesystem, you need to apply a transformer resource to retrieve the records from files. `dlt` natively supports three file types: `csv`, `parquet`, and `jsonl` (more details in [filesystem transformer resource](../filesystem/basic#2-choose-the-right-transformer-resource)).
+Although the `filesystem` resource yields the files from cloud storage or a local filesystem, you need to apply a transformer resource to retrieve the records from files. dlt natively supports three file types: [CSV](../../file-formats/csv.md), [Parquet](../../file-formats/parquet.md), and [JSONL](../../file-formats/jsonl.md) (more details in [filesystem transformer resource](../filesystem/basic#2-choose-the-right-transformer-resource)).
 
 But you can easily create your own. In order to do this, you just need a function that takes as input a `FileItemDict` iterator and yields a list of records (recommended for performance) or individual records.
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
index ac4af7862f..6df10323dd 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/basic.md
@@ -1,14 +1,14 @@
 ---
 title: Filesystem source
 description: Learn how to set up and configure
-keywords: [readers source and filesystem, files, filesystem, readers source, cloud storage]
+keywords: [readers source and filesystem, files, filesystem, readers source, cloud storage, object storage, local file system]
 ---
 import Header from '../_source-info-header.md';
 <Header/>
 
-Filesystem source allows loading files from remote locations (AWS S3, Google Cloud Storage, Google Drive, Azure Blob Storage, SFTP server) or the local filesystem seamlessly. Filesystem source natively supports `csv`, `parquet`, and `jsonl` files and allows customization for loading any type of structured files.
+Filesystem source allows loading files from remote locations (AWS S3, Google Cloud Storage, Google Drive, Azure Blob Storage, SFTP server) or the local filesystem seamlessly. Filesystem source natively supports [CSV](../../file-formats/csv.md), [Parquet](../../file-formats/parquet.md), and [JSONL](../../file-formats/jsonl.md) files and allows customization for loading any type of structured files.
 
-To load unstructured data (`.pdf`, `.txt`, e-mail), please refer to the [unstructured data source](https://github.com/dlt-hub/verified-sources/tree/master/sources/unstructured_data).
+To load unstructured data (PDF, plain text, e-mail), please refer to the [unstructured data source](https://github.com/dlt-hub/verified-sources/tree/master/sources/unstructured_data).
 
 ## How filesystem source works
 
@@ -145,11 +145,8 @@ You don't need any credentials for the local filesystem.
 
 ### Add credentials to dlt pipeline
 
-To provide credentials to the filesystem source, you can use [any method available](../../../general-usage/credentials/setup#available-config-providers) in `dlt`.
-One of the easiest ways is to use configuration files. The `.dlt` folder in your working directory
-contains two files: `config.toml` and `secrets.toml`. Sensitive information, like passwords and
-access tokens, should only be put into `secrets.toml`, while any other configuration, like the path to
-a bucket, can be specified in `config.toml`.
+To provide credentials to the filesystem source, you can use [any method available](../../../general-usage/credentials/setup#available-config-providers) in dlt.
+One of the easiest ways is to use configuration files. The `.dlt` folder in your working directory contains two files: `config.toml` and `secrets.toml`. Sensitive information, like passwords and access tokens, should only be put into `secrets.toml`, while any other configuration, like the path to a bucket, can be specified in `config.toml`.
 
 <Tabs
   groupId="filesystem-type"
@@ -252,9 +249,7 @@ bucket_url='~\Documents\csv_files\'
 
 </Tabs>
 
-You can also specify the credentials using environment variables. The name of the corresponding environment
-variable should be slightly different from the corresponding name in the TOML file. Simply replace dots `.` with double
-underscores `__`:
+You can also specify the credentials using environment variables. The name of the corresponding environment variable should be slightly different from the corresponding name in the TOML file. Simply replace dots `.` with double underscores `__`:
 
 ```sh
 export SOURCES__FILESYSTEM__AWS_ACCESS_KEY_ID = "Please set me up!"
@@ -262,16 +257,12 @@ export SOURCES__FILESYSTEM__AWS_SECRET_ACCESS_KEY = "Please set me up!"
 ```
 
 :::tip
-`dlt` supports more ways of authorizing with cloud storage, including identity-based
-and default credentials. To learn more about adding credentials to your pipeline, please refer to the
-[Configuration and secrets section](../../../general-usage/credentials/complex_types#gcp-credentials).
+dlt supports more ways of authorizing with cloud storage, including identity-based and default credentials. To learn more about adding credentials to your pipeline, please refer to the [Configuration and secrets section](../../../general-usage/credentials/complex_types#gcp-credentials).
 :::
 
 ## Usage
 
-The filesystem source is quite unique since it provides you with building blocks for loading data from files.
-First, it iterates over files in the storage and then processes each file to yield the records.
-Usually, you need two resources:
+The filesystem source is quite unique since it provides you with building blocks for loading data from files. First, it iterates over files in the storage and then processes each file to yield the records. Usually, you need two resources:
 
 1. The `filesystem` resource enumerates files in a selected bucket using a glob pattern, returning details as `FileItem` in customizable page sizes.
 2. One of the available transformer resources to process each file in a specific transforming function and yield the records.
@@ -279,8 +270,7 @@ Usually, you need two resources:
 ### 1. Initialize a `filesystem` resource
 
 :::note
-If you use just the `filesystem` resource, it will only list files in the storage based on glob parameters and yield the
-files [metadata](advanced#fileitem-fields). The `filesystem` resource itself does not read or copy files.
+If you use just the `filesystem` resource, it will only list files in the storage based on glob parameters and yield the files [metadata](advanced#fileitem-fields). The `filesystem` resource itself does not read or copy files.
 :::
 
 All parameters of the resource can be specified directly in code:
@@ -319,9 +309,8 @@ Full list of `filesystem` resource parameters:
 
 ### 2. Choose the right transformer resource
 
-The current implementation of the filesystem source natively supports three file types: `csv`, `parquet`, and `jsonl`.
-You can apply any of the above or [create your own transformer](advanced#create-your-own-transformer). To apply the selected transformer
-resource, use pipe notation `|`:
+The current implementation of the filesystem source natively supports three file types: CSV, Parquet, and JSONL.
+You can apply any of the above or [create your own transformer](advanced#create-your-own-transformer). To apply the selected transformer resource, use pipe notation `|`:
 
 ```py
 from dlt.sources.filesystem import filesystem, read_csv
@@ -334,17 +323,13 @@ filesystem_pipe = filesystem(
 
 #### Available transformers
 
-- `read_csv()` - processes `csv` files using `pandas`
-- `read_jsonl()` - processes `jsonl` files chunk by chunk
-- `read_parquet()` - processes `parquet` files using `pyarrow`
-- `read_csv_duckdb()` - this transformer processes `csv` files using DuckDB, which usually shows better performance than `pandas`.
+- `read_csv()` - processes CSV files using [Pandas](https://pandas.pydata.org/)
+- `read_jsonl()` - processes JSONL files chunk by chunk
+- `read_parquet()` - processes Parquet files using [PyArrow](https://arrow.apache.org/docs/python/)
+- `read_csv_duckdb()` - this transformer processes CSV files using DuckDB, which usually shows better performance than pandas.
 
 :::tip
-We advise that you give each resource a
-[specific name](../../../general-usage/resource#duplicate-and-rename-resources)
-before loading with `pipeline.run`. This will ensure that data goes to a table with the name you
-want and that each pipeline uses a
-[separate state for incremental loading.](../../../general-usage/state#read-and-write-pipeline-state-in-a-resource)
+We advise that you give each resource a [specific name](../../../general-usage/resource#duplicate-and-rename-resources) before loading with `pipeline.run`. This will ensure that data goes to a table with the name you want and that each pipeline uses a [separate state for incremental loading.](../../../general-usage/state#read-and-write-pipeline-state-in-a-resource)
 :::
 
 ### 3. Create and run a pipeline
@@ -406,6 +391,7 @@ print(load_info)
 
 In this example, we load only new records based on the field called `updated_at`. This method may be useful if you are not able to
 filter files by modification date because, for example, all files are modified each time a new record appears.
+
 ```py
 import dlt
 from dlt.sources.filesystem import filesystem, read_csv
@@ -462,6 +448,7 @@ print(load_info)
 
 :::tip
 You could also use `file_glob` to filter files by names. It works very well in simple cases, for example, filtering by extension:
+
 ```py
 from dlt.sources.filesystem import filesystem
 
@@ -505,16 +492,12 @@ bucket_url = '\\?\C:\a\b\c'
 
 ### If you get an empty list of files
 
-If you are running a `dlt` pipeline with the filesystem source and get zero records, we recommend you check
+If you are running a dlt pipeline with the filesystem source and get zero records, we recommend you check
 the configuration of `bucket_url` and `file_glob` parameters.
 
-For example, with Azure Blob storage, people sometimes mistake the account name for the container name. Make sure
-you've set up a URL as `"az://<container name>/"`.
+For example, with Azure Blob Storage, people sometimes mistake the account name for the container name. Make sure you've set up a URL as `"az://<container name>/"`.
 
-Also, please reference the [glob](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob)
-function to configure the resource correctly. Use `**` to include recursive files. Note that the local
-filesystem supports full Python [glob](https://docs.python.org/3/library/glob.html#glob.glob) functionality,
-while cloud storage supports a restricted `fsspec` [version](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob).
+Also, please reference the [glob](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob) function to configure the resource correctly. Use `**` to include recursive files. Note that the local filesystem supports full Python [glob](https://docs.python.org/3/library/glob.html#glob.glob) functionality, while cloud storage supports a restricted `fsspec` [version](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob).
 
 <!--@@@DLT_TUBA filesystem-->
 
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md
index 0aaa07b0c3..5aa930c1ae 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem/index.md
@@ -1,10 +1,10 @@
 ---
-title: Filesystem & cloud storage
-description: dlt-verified source for Filesystem & cloud storage
-keywords: [readers source and filesystem, files, filesystem, readers source, cloud storage]
+title: Cloud storage and filesystem
+description: dlt-verified source for reading files from cloud storage and local file system
+keywords: [file system, files, filesystem, readers source, cloud storage, object storage, local file system]
 ---
 
-The Filesystem source allows seamless loading of files from the following locations:
+The filesystem source allows seamless loading of files from the following locations:
 * AWS S3
 * Google Cloud Storage
 * Google Drive
@@ -12,7 +12,7 @@ The Filesystem source allows seamless loading of files from the following locati
 * remote filesystem (via SFTP)
 * local filesystem
 
-The Filesystem source natively supports `csv`, `parquet`, and `jsonl` files and allows customization for loading any type of structured file.
+The filesystem source natively supports [CSV](../../file-formats/csv.md), [Parquet](../../file-formats/parquet.md), and [JSONL](../../file-formats/jsonl.md) files and allows customization for loading any type of structured file.
 
 import DocCardList from '@theme/DocCardList';
 
diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js
index d63684d3fc..a87616990b 100644
--- a/docs/website/sidebars.js
+++ b/docs/website/sidebars.js
@@ -107,7 +107,7 @@ const sidebars = {
         },
         {
           type: 'category',
-          label: 'Filesystem & cloud storage',
+          label: 'Cloud storage and filesystem',
           description: 'AWS S3, Google Cloud Storage, Azure, SFTP, local file system',
             link: {
             type: 'doc',

From 176cf7435c57936a72406d368a35f767d460bfac Mon Sep 17 00:00:00 2001
From: Violetta Mishechkina <sansiositres@gmail.com>
Date: Fri, 4 Oct 2024 18:30:57 +0200
Subject: [PATCH 23/29] Add redirect from walkthroughs (#1905)

---
 docs/website/netlify.toml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/website/netlify.toml b/docs/website/netlify.toml
index e5d3550f5c..fc3f270005 100644
--- a/docs/website/netlify.toml
+++ b/docs/website/netlify.toml
@@ -37,4 +37,8 @@ to = "/docs/tutorial/load-data-from-an-api"
 
 [[redirects]]
 from = "/docs/telemetry"
-to = "/docs/reference/telemetry"
\ No newline at end of file
+to = "/docs/reference/telemetry"
+
+[[redirects]]
+from = "/docs/walkthroughs"
+to = "docs/intro"

From 786a08e2c62a72ec79b9996f6377855b1200340a Mon Sep 17 00:00:00 2001
From: Paul Maisondieu <77961716+paul-godhouse@users.noreply.github.com>
Date: Sat, 5 Oct 2024 12:55:43 +0200
Subject: [PATCH 24/29] =?UTF-8?q?fix:=20PageNumberPaginator=20not=20reset?=
 =?UTF-8?q?=20when=20iterating=20through=20multiple=20pa=E2=80=A6=20(#1924?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dlt/sources/helpers/rest_client/paginators.py |  8 ++++--
 .../helpers/rest_client/test_paginators.py    | 27 +++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py
index 872d4f34e8..82b97e253b 100644
--- a/dlt/sources/helpers/rest_client/paginators.py
+++ b/dlt/sources/helpers/rest_client/paginators.py
@@ -1,9 +1,10 @@
 import warnings
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional
-from urllib.parse import urlparse, urljoin
+from urllib.parse import urljoin, urlparse
+
+from requests import Request, Response
 
-from requests import Response, Request
 from dlt.common import jsonpath
 
 
@@ -127,6 +128,7 @@ def __init__(
                 " provided."
             )
         self.param_name = param_name
+        self.initial_value = initial_value
         self.current_value = initial_value
         self.value_step = value_step
         self.base_index = base_index
@@ -136,6 +138,8 @@ def __init__(
         self.stop_after_empty_page = stop_after_empty_page
 
     def init_request(self, request: Request) -> None:
+        self._has_next_page = True
+        self.current_value = self.initial_value
         if request.params is None:
             request.params = {}
 
diff --git a/tests/sources/helpers/rest_client/test_paginators.py b/tests/sources/helpers/rest_client/test_paginators.py
index 39e3d767a0..49a6275536 100644
--- a/tests/sources/helpers/rest_client/test_paginators.py
+++ b/tests/sources/helpers/rest_client/test_paginators.py
@@ -380,6 +380,33 @@ def test_update_state(self):
         paginator.update_state(response, data=NON_EMPTY_PAGE)
         assert paginator.has_next_page is False
 
+    def test_init_request(self):
+        paginator = PageNumberPaginator(base_page=1, total_path=None)
+        request = Mock(Request)
+        request.params = {}
+        response = Mock(Response, json=lambda: "OK")
+
+        assert paginator.current_value == 1
+        assert paginator.has_next_page is True
+        paginator.init_request(request)
+
+        paginator.update_state(response, data=NON_EMPTY_PAGE)
+        paginator.update_request(request)
+
+        assert paginator.current_value == 2
+        assert paginator.has_next_page is True
+        assert request.params["page"] == 2
+
+        paginator.update_state(response, data=None)
+        paginator.update_request(request)
+
+        assert paginator.current_value == 2
+        assert paginator.has_next_page is False
+
+        paginator.init_request(request)
+        assert paginator.current_value == 1
+        assert paginator.has_next_page is True
+
     def test_update_state_with_string_total_pages(self):
         paginator = PageNumberPaginator(base_page=1, page=1)
         response = Mock(Response, json=lambda: {"total": "3"})

From ca9a8697f12f2ef9e8415d82788540b50346a963 Mon Sep 17 00:00:00 2001
From: TheOneTrueAnt <theonetrueant@gmail.com>
Date: Sat, 5 Oct 2024 17:58:10 +0100
Subject: [PATCH 25/29] Feat/1922 rest api source add mulitple path parameters
 (#1923)

Co-authored-by: Antony Rinaldi <antony.rinaldi@bailliegifford.com>
---
 dlt/sources/rest_api/__init__.py              | 17 ++---
 dlt/sources/rest_api/config_setup.py          | 55 +++++++++-------
 .../configurations/test_resolve_config.py     | 63 ++++++++++++-------
 3 files changed, 84 insertions(+), 51 deletions(-)

diff --git a/dlt/sources/rest_api/__init__.py b/dlt/sources/rest_api/__init__.py
index b92ed6301c..4845433850 100644
--- a/dlt/sources/rest_api/__init__.py
+++ b/dlt/sources/rest_api/__init__.py
@@ -212,7 +212,7 @@ def create_resources(
     client_config: ClientConfig,
     dependency_graph: graphlib.TopologicalSorter,
     endpoint_resource_map: Dict[str, Union[EndpointResource, DltResource]],
-    resolved_param_map: Dict[str, Optional[ResolvedParam]],
+    resolved_param_map: Dict[str, Optional[List[ResolvedParam]]],
 ) -> Dict[str, DltResource]:
     resources = {}
 
@@ -229,10 +229,10 @@ def create_resources(
         paginator = create_paginator(endpoint_config.get("paginator"))
         processing_steps = endpoint_resource.pop("processing_steps", [])
 
-        resolved_param: ResolvedParam = resolved_param_map[resource_name]
+        resolved_params: List[ResolvedParam] = resolved_param_map[resource_name]
 
         include_from_parent: List[str] = endpoint_resource.get("include_from_parent", [])
-        if not resolved_param and include_from_parent:
+        if not resolved_params and include_from_parent:
             raise ValueError(
                 f"Resource {resource_name} has include_from_parent but is not "
                 "dependent on another resource"
@@ -267,7 +267,7 @@ def process(
                     resource.add_map(step["map"])
             return resource
 
-        if resolved_param is None:
+        if resolved_params is None:
 
             def paginate_resource(
                 method: HTTPMethodBasic,
@@ -318,9 +318,10 @@ def paginate_resource(
             resources[resource_name] = process(resources[resource_name], processing_steps)
 
         else:
-            predecessor = resources[resolved_param.resolve_config["resource"]]
+            first_param = resolved_params[0]
+            predecessor = resources[first_param.resolve_config["resource"]]
 
-            base_params = exclude_keys(request_params, {resolved_param.param_name})
+            base_params = exclude_keys(request_params, {x.param_name for x in resolved_params})
 
             def paginate_dependent_resource(
                 items: List[Dict[str, Any]],
@@ -331,7 +332,7 @@ def paginate_dependent_resource(
                 data_selector: Optional[jsonpath.TJsonPath],
                 hooks: Optional[Dict[str, Any]],
                 client: RESTClient = client,
-                resolved_param: ResolvedParam = resolved_param,
+                resolved_params: List[ResolvedParam] = resolved_params,
                 include_from_parent: List[str] = include_from_parent,
                 incremental_object: Optional[Incremental[Any]] = incremental_object,
                 incremental_param: Optional[IncrementalParam] = incremental_param,
@@ -349,7 +350,7 @@ def paginate_dependent_resource(
 
                 for item in items:
                     formatted_path, parent_record = process_parent_data_item(
-                        path, item, resolved_param, include_from_parent
+                        path, item, resolved_params, include_from_parent
                     )
 
                     for child_page in client.paginate(
diff --git a/dlt/sources/rest_api/config_setup.py b/dlt/sources/rest_api/config_setup.py
index 916715b214..8debaa59da 100644
--- a/dlt/sources/rest_api/config_setup.py
+++ b/dlt/sources/rest_api/config_setup.py
@@ -273,10 +273,10 @@ def build_resource_dependency_graph(
     resource_defaults: EndpointResourceBase,
     resource_list: List[Union[str, EndpointResource, DltResource]],
 ) -> Tuple[
-    Any, Dict[str, Union[EndpointResource, DltResource]], Dict[str, Optional[ResolvedParam]]
+    Any, Dict[str, Union[EndpointResource, DltResource]], Dict[str, Optional[List[ResolvedParam]]]
 ]:
     dependency_graph = graphlib.TopologicalSorter()
-    resolved_param_map: Dict[str, ResolvedParam] = {}
+    resolved_param_map: Dict[str, Optional[List[ResolvedParam]]] = {}
     endpoint_resource_map = expand_and_index_resources(resource_list, resource_defaults)
 
     # create dependency graph
@@ -288,20 +288,24 @@ def build_resource_dependency_graph(
         assert isinstance(endpoint_resource["endpoint"], dict)
         # connect transformers to resources via resolved params
         resolved_params = _find_resolved_params(endpoint_resource["endpoint"])
-        if len(resolved_params) > 1:
-            raise ValueError(
-                f"Multiple resolved params for resource {resource_name}: {resolved_params}"
-            )
-        elif len(resolved_params) == 1:
-            resolved_param = resolved_params[0]
-            predecessor = resolved_param.resolve_config["resource"]
+
+        # set of resources in resolved params
+        named_resources = {rp.resolve_config["resource"] for rp in resolved_params}
+
+        if len(named_resources) > 1:
+            raise ValueError(f"Multiple parent resources for {resource_name}: {resolved_params}")
+        elif len(named_resources) == 1:
+            # validate the first parameter (note the resource is the same for all params)
+            first_param = resolved_params[0]
+            predecessor = first_param.resolve_config["resource"]
             if predecessor not in endpoint_resource_map:
                 raise ValueError(
                     f"A transformer resource {resource_name} refers to non existing parent resource"
-                    f" {predecessor} on {resolved_param}"
+                    f" {predecessor} on {first_param}"
                 )
+
             dependency_graph.add(resource_name, predecessor)
-            resolved_param_map[resource_name] = resolved_param
+            resolved_param_map[resource_name] = resolved_params
         else:
             dependency_graph.add(resource_name)
             resolved_param_map[resource_name] = None
@@ -574,21 +578,28 @@ def remove_field(response: Response, *args, **kwargs) -> Response:
 def process_parent_data_item(
     path: str,
     item: Dict[str, Any],
-    resolved_param: ResolvedParam,
+    resolved_params: List[ResolvedParam],
     include_from_parent: List[str],
 ) -> Tuple[str, Dict[str, Any]]:
-    parent_resource_name = resolved_param.resolve_config["resource"]
+    parent_resource_name = resolved_params[0].resolve_config["resource"]
 
-    field_values = jsonpath.find_values(resolved_param.field_path, item)
+    param_values = {}
 
-    if not field_values:
-        field_path = resolved_param.resolve_config["field"]
-        raise ValueError(
-            f"Transformer expects a field '{field_path}' to be present in the incoming data from"
-            f" resource {parent_resource_name} in order to bind it to path param"
-            f" {resolved_param.param_name}. Available parent fields are {', '.join(item.keys())}"
-        )
-    bound_path = path.format(**{resolved_param.param_name: field_values[0]})
+    for resolved_param in resolved_params:
+        field_values = jsonpath.find_values(resolved_param.field_path, item)
+
+        if not field_values:
+            field_path = resolved_param.resolve_config["field"]
+            raise ValueError(
+                f"Transformer expects a field '{field_path}' to be present in the incoming data"
+                f" from resource {parent_resource_name} in order to bind it to path param"
+                f" {resolved_param.param_name}. Available parent fields are"
+                f" {', '.join(item.keys())}"
+            )
+
+        param_values[resolved_param.param_name] = field_values[0]
+
+    bound_path = path.format(**param_values)
 
     parent_record: Dict[str, Any] = {}
     if include_from_parent:
diff --git a/tests/sources/rest_api/configurations/test_resolve_config.py b/tests/sources/rest_api/configurations/test_resolve_config.py
index a0ca7ce890..d3d9308df1 100644
--- a/tests/sources/rest_api/configurations/test_resolve_config.py
+++ b/tests/sources/rest_api/configurations/test_resolve_config.py
@@ -88,32 +88,34 @@ def test_bind_path_param() -> None:
 
 
 def test_process_parent_data_item() -> None:
-    resolve_param = ResolvedParam(
-        "id", {"field": "obj_id", "resource": "issues", "type": "resolve"}
-    )
+    resolve_params = [
+        ResolvedParam("id", {"field": "obj_id", "resource": "issues", "type": "resolve"})
+    ]
     bound_path, parent_record = process_parent_data_item(
-        "dlt-hub/dlt/issues/{id}/comments", {"obj_id": 12345}, resolve_param, None
+        "dlt-hub/dlt/issues/{id}/comments", {"obj_id": 12345}, resolve_params, None
     )
     assert bound_path == "dlt-hub/dlt/issues/12345/comments"
     assert parent_record == {}
 
     bound_path, parent_record = process_parent_data_item(
-        "dlt-hub/dlt/issues/{id}/comments", {"obj_id": 12345}, resolve_param, ["obj_id"]
+        "dlt-hub/dlt/issues/{id}/comments", {"obj_id": 12345}, resolve_params, ["obj_id"]
     )
     assert parent_record == {"_issues_obj_id": 12345}
 
     bound_path, parent_record = process_parent_data_item(
         "dlt-hub/dlt/issues/{id}/comments",
         {"obj_id": 12345, "obj_node": "node_1"},
-        resolve_param,
+        resolve_params,
         ["obj_id", "obj_node"],
     )
     assert parent_record == {"_issues_obj_id": 12345, "_issues_obj_node": "node_1"}
 
     # test nested data
-    resolve_param_nested = ResolvedParam(
-        "id", {"field": "some_results.obj_id", "resource": "issues", "type": "resolve"}
-    )
+    resolve_param_nested = [
+        ResolvedParam(
+            "id", {"field": "some_results.obj_id", "resource": "issues", "type": "resolve"}
+        )
+    ]
     item = {"some_results": {"obj_id": 12345}}
     bound_path, parent_record = process_parent_data_item(
         "dlt-hub/dlt/issues/{id}/comments", item, resolve_param_nested, None
@@ -123,7 +125,7 @@ def test_process_parent_data_item() -> None:
     # param path not found
     with pytest.raises(ValueError) as val_ex:
         bound_path, parent_record = process_parent_data_item(
-            "dlt-hub/dlt/issues/{id}/comments", {"_id": 12345}, resolve_param, None
+            "dlt-hub/dlt/issues/{id}/comments", {"_id": 12345}, resolve_params, None
         )
     assert "Transformer expects a field 'obj_id'" in str(val_ex.value)
 
@@ -132,11 +134,36 @@ def test_process_parent_data_item() -> None:
         bound_path, parent_record = process_parent_data_item(
             "dlt-hub/dlt/issues/{id}/comments",
             {"obj_id": 12345, "obj_node": "node_1"},
-            resolve_param,
+            resolve_params,
             ["obj_id", "node"],
         )
     assert "in order to include it in child records under _issues_node" in str(val_ex.value)
 
+    # Resolve multiple parameters from a single record
+    multi_resolve_params = [
+        ResolvedParam("issue_id", {"field": "issue", "resource": "comments", "type": "resolve"}),
+        ResolvedParam("id", {"field": "id", "resource": "comments", "type": "resolve"}),
+    ]
+
+    bound_path, parent_record = process_parent_data_item(
+        "dlt-hub/dlt/issues/{issue_id}/comments/{id}",
+        {"issue": 12345, "id": 56789},
+        multi_resolve_params,
+        None,
+    )
+    assert bound_path == "dlt-hub/dlt/issues/12345/comments/56789"
+    assert parent_record == {}
+
+    # param path not found with multiple parameters
+    with pytest.raises(ValueError) as val_ex:
+        bound_path, parent_record = process_parent_data_item(
+            "dlt-hub/dlt/issues/{issue_id}/comments/{id}",
+            {"_issue": 12345, "id": 56789},
+            multi_resolve_params,
+            None,
+        )
+    assert "Transformer expects a field 'issue'" in str(val_ex.value)
+
 
 def test_two_resources_can_depend_on_one_parent_resource() -> None:
     user_id = {
@@ -173,7 +200,7 @@ def test_two_resources_can_depend_on_one_parent_resource() -> None:
     assert resources["user_details"]._pipe.parent.name == "users"
 
 
-def test_dependent_resource_cannot_bind_multiple_parameters() -> None:
+def test_dependent_resource_can_bind_multiple_parameters() -> None:
     config: RESTAPIConfig = {
         "client": {
             "base_url": "https://api.example.com",
@@ -200,15 +227,9 @@ def test_dependent_resource_cannot_bind_multiple_parameters() -> None:
             },
         ],
     }
-    with pytest.raises(ValueError) as e:
-        rest_api_resources(config)
 
-    error_part_1 = re.escape(
-        "Multiple resolved params for resource user_details: [ResolvedParam(param_name='user_id'"
-    )
-    error_part_2 = re.escape("ResolvedParam(param_name='group_id'")
-    assert e.match(error_part_1)
-    assert e.match(error_part_2)
+    resources = rest_api_source(config).resources
+    assert resources["user_details"]._pipe.parent.name == "users"
 
 
 def test_one_resource_cannot_bind_two_parents() -> None:
@@ -244,7 +265,7 @@ def test_one_resource_cannot_bind_two_parents() -> None:
         rest_api_resources(config)
 
     error_part_1 = re.escape(
-        "Multiple resolved params for resource user_details: [ResolvedParam(param_name='user_id'"
+        "Multiple parent resources for user_details: [ResolvedParam(param_name='user_id'"
     )
     error_part_2 = re.escape("ResolvedParam(param_name='group_id'")
     assert e.match(error_part_1)

From b97a45c8b215f99a1804e80409abc60d60af49e2 Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Sun, 6 Oct 2024 14:50:19 +0200
Subject: [PATCH 26/29] Docs: add docs for resolving multiple path parameters
 (#1929)

---
 .../verified-sources/rest_api/basic.md        | 50 ++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md
index 03214950f4..b7ce29b391 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/rest_api/basic.md
@@ -574,7 +574,7 @@ rest_api.config_setup.register_auth("custom_auth", CustomAuth)
 
 ### Define resource relationships
 
-When you have a resource that depends on another resource, you can define the relationship using the `resolve` configuration. With it, you link a path parameter in the child resource to a field in the parent resource's data.
+When you have a resource that depends on another resource, you can define the relationship using the `resolve` configuration. This allows you to link one or more path parameters in the child resource to fields in the parent resource's data.
 
 In the GitHub example, the `issue_comments` resource depends on the `issues` resource. The `issue_number` parameter in the `issue_comments` endpoint configuration is resolved from the `number` field of the `issues` resource:
 
@@ -638,6 +638,54 @@ The `field` value can be specified as a [JSONPath](https://github.com/h2non/json
 
 Under the hood, dlt handles this by using a [transformer resource](../../../general-usage/resource.md#process-resources-with-dlttransformer).
 
+#### Resolving multiple path parameters from a parent resource
+
+When a child resource depends on multiple fields from a single parent resource, you can define multiple `resolve` parameters in the endpoint configuration. For example:
+
+```py
+{
+    "resources": [
+        "groups",
+        {
+            "name": "users",
+            "endpoint": {
+                "path": "groups/{group_id}/users",
+                "params": {
+                    "group_id": {
+                        "type": "resolve",
+                        "resource": "groups",
+                        "field": "id",
+                    },
+                },
+            },
+        },
+        {
+            "name": "user_details",
+            "endpoint": {
+                "path": "groups/{group_id}/users/{user_id}/details",
+                "params": {
+                    "group_id": {
+                        "type": "resolve",
+                        "resource": "users",
+                        "field": "group_id",
+                    },
+                    "user_id": {
+                        "type": "resolve",
+                        "resource": "users",
+                        "field": "id",
+                    },
+                },
+            },
+        },
+    ],
+}
+```
+
+In the configuration above:
+
+- The `users` resource depends on the `groups` resource, resolving the `group_id` parameter from the `id` field in `groups`.
+- The `user_details` resource depends on the `users` resource, resolving both `group_id` and `user_id` parameters from fields in `users`.
+
 #### Include fields from the parent resource
 
 You can include data from the parent resource in the child resource by using the `include_from_parent` field in the resource configuration. For example:

From 4e90278b70ba77bb097fb60ca06789023dcc23b5 Mon Sep 17 00:00:00 2001
From: Steinthor Palsson <steini90@gmail.com>
Date: Mon, 7 Oct 2024 05:11:05 -0400
Subject: [PATCH 27/29] Sqlalchemy merge support (#1842)

* Sqlalchemy merge job (insert replace)

* Fix

* Sqlalchemy scd2

* Update test for mysql

* Fix sqlalchemy 2.0
---
 .../impl/sqlalchemy/db_api_client.py          |  10 +-
 dlt/destinations/impl/sqlalchemy/factory.py   |  12 +
 dlt/destinations/impl/sqlalchemy/load_jobs.py |   9 +
 dlt/destinations/impl/sqlalchemy/merge_job.py | 441 ++++++++++++++++++
 .../impl/sqlalchemy/sqlalchemy_job_client.py  |  15 +-
 .../dlt-ecosystem/destinations/sqlalchemy.md  |   3 +-
 tests/load/pipeline/test_scd2.py              |  47 +-
 .../test_write_disposition_changes.py         |  14 +-
 tests/load/utils.py                           |   4 +-
 9 files changed, 519 insertions(+), 36 deletions(-)
 create mode 100644 dlt/destinations/impl/sqlalchemy/merge_job.py

diff --git a/dlt/destinations/impl/sqlalchemy/db_api_client.py b/dlt/destinations/impl/sqlalchemy/db_api_client.py
index 829fe8db82..7bc64240e1 100644
--- a/dlt/destinations/impl/sqlalchemy/db_api_client.py
+++ b/dlt/destinations/impl/sqlalchemy/db_api_client.py
@@ -238,16 +238,16 @@ def _sqlite_create_dataset(self, dataset_name: str) -> None:
         """Mimic multiple schemas in sqlite using ATTACH DATABASE to
         attach a new database file to the current connection.
         """
-        if dataset_name == "main":
-            # main always exists
-            return
         if self._sqlite_is_memory_db():
             new_db_fn = ":memory:"
         else:
             new_db_fn = self._sqlite_dataset_filename(dataset_name)
 
-        statement = "ATTACH DATABASE :fn AS :name"
-        self.execute_sql(statement, fn=new_db_fn, name=dataset_name)
+        if dataset_name != "main":  # main is the current file, it is always attached
+            statement = "ATTACH DATABASE :fn AS :name"
+            self.execute_sql(statement, fn=new_db_fn, name=dataset_name)
+        # WAL mode is applied to all currently attached databases
+        self.execute_sql("PRAGMA journal_mode=WAL")
         self._sqlite_attached_datasets.add(dataset_name)
 
     def _sqlite_drop_dataset(self, dataset_name: str) -> None:
diff --git a/dlt/destinations/impl/sqlalchemy/factory.py b/dlt/destinations/impl/sqlalchemy/factory.py
index 360dd89192..bf05c42f08 100644
--- a/dlt/destinations/impl/sqlalchemy/factory.py
+++ b/dlt/destinations/impl/sqlalchemy/factory.py
@@ -1,5 +1,6 @@
 import typing as t
 
+from dlt.common import pendulum
 from dlt.common.destination import Destination, DestinationCapabilitiesContext
 from dlt.common.destination.capabilities import DataTypeMapper
 from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE
@@ -9,6 +10,7 @@
     SqlalchemyCredentials,
     SqlalchemyClientConfiguration,
 )
+from dlt.common.data_writers.escape import format_datetime_literal
 
 SqlalchemyTypeMapper: t.Type[DataTypeMapper]
 
@@ -24,6 +26,13 @@
     from sqlalchemy.engine import Engine
 
 
+def _format_mysql_datetime_literal(
+    v: pendulum.DateTime, precision: int = 6, no_tz: bool = False
+) -> str:
+    # Format without timezone to prevent tz conversion in SELECT
+    return format_datetime_literal(v, precision, no_tz=True)
+
+
 class sqlalchemy(Destination[SqlalchemyClientConfiguration, "SqlalchemyJobClient"]):
     spec = SqlalchemyClientConfiguration
 
@@ -50,6 +59,7 @@ def _raw_capabilities(self) -> DestinationCapabilitiesContext:
         caps.supports_multiple_statements = False
         caps.type_mapper = SqlalchemyTypeMapper
         caps.supported_replace_strategies = ["truncate-and-insert", "insert-from-staging"]
+        caps.supported_merge_strategies = ["delete-insert", "scd2"]
 
         return caps
 
@@ -67,6 +77,8 @@ def adjust_capabilities(
         caps.max_identifier_length = dialect.max_identifier_length
         caps.max_column_identifier_length = dialect.max_identifier_length
         caps.supports_native_boolean = dialect.supports_native_boolean
+        if dialect.name == "mysql":
+            caps.format_datetime_literal = _format_mysql_datetime_literal
 
         return caps
 
diff --git a/dlt/destinations/impl/sqlalchemy/load_jobs.py b/dlt/destinations/impl/sqlalchemy/load_jobs.py
index c8486dc0f0..3cfd6bd910 100644
--- a/dlt/destinations/impl/sqlalchemy/load_jobs.py
+++ b/dlt/destinations/impl/sqlalchemy/load_jobs.py
@@ -13,6 +13,7 @@
 from dlt.destinations.sql_jobs import SqlFollowupJob, SqlJobParams
 
 from dlt.destinations.impl.sqlalchemy.db_api_client import SqlalchemyClient
+from dlt.destinations.impl.sqlalchemy.merge_job import SqlalchemyMergeFollowupJob
 
 if TYPE_CHECKING:
     from dlt.destinations.impl.sqlalchemy.sqlalchemy_job_client import SqlalchemyJobClient
@@ -134,3 +135,11 @@ def generate_sql(
             statements.append(stmt)
 
         return statements
+
+
+__all__ = [
+    "SqlalchemyJsonLInsertJob",
+    "SqlalchemyParquetInsertJob",
+    "SqlalchemyStagingCopyJob",
+    "SqlalchemyMergeFollowupJob",
+]
diff --git a/dlt/destinations/impl/sqlalchemy/merge_job.py b/dlt/destinations/impl/sqlalchemy/merge_job.py
new file mode 100644
index 0000000000..5360939ba0
--- /dev/null
+++ b/dlt/destinations/impl/sqlalchemy/merge_job.py
@@ -0,0 +1,441 @@
+from typing import Sequence, Tuple, Optional, List, Union
+import operator
+
+import sqlalchemy as sa
+
+from dlt.destinations.sql_jobs import SqlMergeFollowupJob
+from dlt.common.destination.reference import PreparedTableSchema, DestinationCapabilitiesContext
+from dlt.destinations.impl.sqlalchemy.db_api_client import SqlalchemyClient
+from dlt.common.schema.utils import (
+    get_columns_names_with_prop,
+    get_dedup_sort_tuple,
+    get_first_column_name_with_prop,
+    is_nested_table,
+    get_validity_column_names,
+    get_active_record_timestamp,
+)
+from dlt.common.time import ensure_pendulum_datetime
+from dlt.common.storages.load_package import load_package as current_load_package
+
+
+class SqlalchemyMergeFollowupJob(SqlMergeFollowupJob):
+    """Uses SQLAlchemy to generate merge SQL statements.
+    Result is equivalent to the SQL generated by `SqlMergeFollowupJob`
+    except for delete-insert we use concrete tables instead of temporary tables.
+    """
+
+    @classmethod
+    def gen_merge_sql(
+        cls,
+        table_chain: Sequence[PreparedTableSchema],
+        sql_client: SqlalchemyClient,  # type: ignore[override]
+    ) -> List[str]:
+        root_table = table_chain[0]
+
+        root_table_obj = sql_client.get_existing_table(root_table["name"])
+        staging_root_table_obj = root_table_obj.to_metadata(
+            sql_client.metadata, schema=sql_client.staging_dataset_name
+        )
+
+        primary_key_names = get_columns_names_with_prop(root_table, "primary_key")
+        merge_key_names = get_columns_names_with_prop(root_table, "merge_key")
+
+        temp_metadata = sa.MetaData()
+
+        append_fallback = (len(primary_key_names) + len(merge_key_names)) == 0
+
+        sqla_statements = []
+        tables_to_drop: List[sa.Table] = (
+            []
+        )  # Keep track of temp tables to drop at the end of the job
+
+        if not append_fallback:
+            key_clause = cls._generate_key_table_clauses(
+                primary_key_names, merge_key_names, root_table_obj, staging_root_table_obj
+            )
+
+            # Generate the delete statements
+            if len(table_chain) == 1 and not cls.requires_temp_table_for_delete():
+                delete_statement = root_table_obj.delete().where(
+                    sa.exists(
+                        sa.select(sa.literal(1))
+                        .where(key_clause)
+                        .select_from(staging_root_table_obj)
+                    )
+                )
+                sqla_statements.append(delete_statement)
+            else:
+                row_key_col_name = cls._get_row_key_col(table_chain, sql_client, root_table)
+                row_key_col = root_table_obj.c[row_key_col_name]
+                # Use a real table cause sqlalchemy doesn't have TEMPORARY TABLE abstractions
+                delete_temp_table = sa.Table(
+                    "delete_" + root_table_obj.name,
+                    temp_metadata,
+                    # Give this column a fixed name to be able to reference it later
+                    sa.Column("_dlt_id", row_key_col.type),
+                    schema=staging_root_table_obj.schema,
+                )
+                tables_to_drop.append(delete_temp_table)
+                # Add the CREATE TABLE statement
+                sqla_statements.append(sa.sql.ddl.CreateTable(delete_temp_table))
+                # Insert data into the "temporary" table
+                insert_statement = delete_temp_table.insert().from_select(
+                    [row_key_col],
+                    sa.select(row_key_col).where(
+                        sa.exists(
+                            sa.select(sa.literal(1))
+                            .where(key_clause)
+                            .select_from(staging_root_table_obj)
+                        )
+                    ),
+                )
+                sqla_statements.append(insert_statement)
+
+                for table in table_chain[1:]:
+                    chain_table_obj = sql_client.get_existing_table(table["name"])
+                    root_key_name = cls._get_root_key_col(table_chain, sql_client, table)
+                    root_key_col = chain_table_obj.c[root_key_name]
+
+                    delete_statement = chain_table_obj.delete().where(
+                        root_key_col.in_(sa.select(delete_temp_table.c._dlt_id))
+                    )
+
+                    sqla_statements.append(delete_statement)
+
+                # Delete from root table
+                delete_statement = root_table_obj.delete().where(
+                    row_key_col.in_(sa.select(delete_temp_table.c._dlt_id))
+                )
+                sqla_statements.append(delete_statement)
+
+        hard_delete_col_name, not_delete_cond = cls._get_hard_delete_col_and_cond(
+            root_table,
+            root_table_obj,
+            invert=True,
+        )
+
+        dedup_sort = get_dedup_sort_tuple(root_table)  # column_name, 'asc' | 'desc'
+
+        if len(table_chain) > 1 and (primary_key_names or hard_delete_col_name is not None):
+            condition_column_names = (
+                None if hard_delete_col_name is None else [hard_delete_col_name]
+            )
+            condition_columns = (
+                [staging_root_table_obj.c[col_name] for col_name in condition_column_names]
+                if condition_column_names is not None
+                else []
+            )
+
+            staging_row_key_col = staging_root_table_obj.c[row_key_col_name]
+
+            # Create the insert "temporary" table (but use a concrete table)
+            insert_temp_table = sa.Table(
+                "insert_" + root_table_obj.name,
+                temp_metadata,
+                sa.Column(row_key_col_name, staging_row_key_col.type),
+                schema=staging_root_table_obj.schema,
+            )
+            tables_to_drop.append(insert_temp_table)
+            create_insert_temp_table_statement = sa.sql.ddl.CreateTable(insert_temp_table)
+            sqla_statements.append(create_insert_temp_table_statement)
+            staging_primary_key_cols = [
+                staging_root_table_obj.c[col_name] for col_name in primary_key_names
+            ]
+
+            inner_cols = [staging_row_key_col]
+
+            if primary_key_names:
+                if dedup_sort is not None:
+                    order_by_col = staging_root_table_obj.c[dedup_sort[0]]
+                    order_dir_func = sa.asc if dedup_sort[1] == "asc" else sa.desc
+                else:
+                    order_by_col = sa.select(sa.literal(None))
+                    order_dir_func = sa.asc
+                if condition_columns:
+                    inner_cols += condition_columns
+
+                inner_select = sa.select(
+                    sa.func.row_number()
+                    .over(
+                        partition_by=set(staging_primary_key_cols),
+                        order_by=order_dir_func(order_by_col),
+                    )
+                    .label("_dlt_dedup_rn"),
+                    *inner_cols,
+                ).subquery()
+
+                select_for_temp_insert = sa.select(inner_select.c[row_key_col_name]).where(
+                    inner_select.c._dlt_dedup_rn == 1
+                )
+                hard_delete_col_name, not_delete_cond = cls._get_hard_delete_col_and_cond(
+                    root_table,
+                    inner_select,
+                    invert=True,
+                )
+
+                if not_delete_cond is not None:
+                    select_for_temp_insert = select_for_temp_insert.where(not_delete_cond)
+            else:
+                hard_delete_col_name, not_delete_cond = cls._get_hard_delete_col_and_cond(
+                    root_table,
+                    staging_root_table_obj,
+                    invert=True,
+                )
+                select_for_temp_insert = sa.select(staging_row_key_col).where(not_delete_cond)
+
+            insert_into_temp_table = insert_temp_table.insert().from_select(
+                [row_key_col_name], select_for_temp_insert
+            )
+            sqla_statements.append(insert_into_temp_table)
+
+        # Insert from staging to dataset
+        for table in table_chain:
+            table_obj = sql_client.get_existing_table(table["name"])
+            staging_table_obj = table_obj.to_metadata(
+                sql_client.metadata, schema=sql_client.staging_dataset_name
+            )
+            select_sql = staging_table_obj.select()
+
+            if (primary_key_names and len(table_chain) > 1) or (
+                not primary_key_names
+                and is_nested_table(table)
+                and hard_delete_col_name is not None
+            ):
+                uniq_column_name = root_key_name if is_nested_table(table) else row_key_col_name
+                uniq_column = staging_table_obj.c[uniq_column_name]
+                select_sql = select_sql.where(
+                    uniq_column.in_(
+                        sa.select(
+                            insert_temp_table.c[row_key_col_name].label(uniq_column_name)
+                        ).subquery()
+                    )
+                )
+            elif primary_key_names and len(table_chain) == 1:
+                staging_primary_key_cols = [
+                    staging_table_obj.c[col_name] for col_name in primary_key_names
+                ]
+                if dedup_sort is not None:
+                    order_by_col = staging_table_obj.c[dedup_sort[0]]
+                    order_dir_func = sa.asc if dedup_sort[1] == "asc" else sa.desc
+                else:
+                    order_by_col = sa.select(sa.literal(None))
+                    order_dir_func = sa.asc
+
+                inner_select = sa.select(
+                    staging_table_obj,
+                    sa.func.row_number()
+                    .over(
+                        partition_by=set(staging_primary_key_cols),
+                        order_by=order_dir_func(order_by_col),
+                    )
+                    .label("_dlt_dedup_rn"),
+                ).subquery()
+
+                select_sql = sa.select(
+                    *[c for c in inner_select.c if c.name != "_dlt_dedup_rn"]
+                ).where(inner_select.c._dlt_dedup_rn == 1)
+
+                hard_delete_col_name, not_delete_cond = cls._get_hard_delete_col_and_cond(
+                    root_table, inner_select, invert=True
+                )
+
+                if hard_delete_col_name is not None:
+                    select_sql = select_sql.where(not_delete_cond)
+            else:
+                hard_delete_col_name, not_delete_cond = cls._get_hard_delete_col_and_cond(
+                    root_table, staging_root_table_obj, invert=True
+                )
+
+                if hard_delete_col_name is not None:
+                    select_sql = select_sql.where(not_delete_cond)
+
+            insert_statement = table_obj.insert().from_select(
+                [col.name for col in table_obj.columns], select_sql
+            )
+            sqla_statements.append(insert_statement)
+
+        # Drop all "temp" tables at the end
+        for table_obj in tables_to_drop:
+            sqla_statements.append(sa.sql.ddl.DropTable(table_obj))
+
+        return [
+            x + ";" if not x.endswith(";") else x
+            for x in (
+                str(stmt.compile(sql_client.engine, compile_kwargs={"literal_binds": True}))
+                for stmt in sqla_statements
+            )
+        ]
+
+    @classmethod
+    def _get_hard_delete_col_and_cond(  # type: ignore[override]
+        cls,
+        table: PreparedTableSchema,
+        table_obj: sa.Table,
+        invert: bool = False,
+    ) -> Tuple[Optional[str], Optional[sa.sql.elements.BinaryExpression]]:
+        col_name = get_first_column_name_with_prop(table, "hard_delete")
+        if col_name is None:
+            return None, None
+        col = table_obj.c[col_name]
+        if invert:
+            cond = col.is_(None)
+        else:
+            cond = col.isnot(None)
+        if table["columns"][col_name]["data_type"] == "bool":
+            if invert:
+                cond = sa.or_(cond, col.is_(False))
+            else:
+                cond = col.is_(True)
+        return col_name, cond
+
+    @classmethod
+    def _generate_key_table_clauses(
+        cls,
+        primary_keys: Sequence[str],
+        merge_keys: Sequence[str],
+        root_table_obj: sa.Table,
+        staging_root_table_obj: sa.Table,
+    ) -> sa.sql.ClauseElement:
+        # Returns an sqlalchemy or_ clause
+        clauses = []
+        if primary_keys or merge_keys:
+            for key in primary_keys:
+                clauses.append(
+                    sa.and_(
+                        *[
+                            root_table_obj.c[key] == staging_root_table_obj.c[key]
+                            for key in primary_keys
+                        ]
+                    )
+                )
+            for key in merge_keys:
+                clauses.append(
+                    sa.and_(
+                        *[
+                            root_table_obj.c[key] == staging_root_table_obj.c[key]
+                            for key in merge_keys
+                        ]
+                    )
+                )
+            return sa.or_(*clauses)  # type: ignore[no-any-return]
+        else:
+            return sa.true()  # type: ignore[no-any-return]
+
+    @classmethod
+    def _gen_concat_sqla(
+        cls, columns: Sequence[sa.Column]
+    ) -> Union[sa.sql.elements.BinaryExpression, sa.Column]:
+        # Use col1 + col2 + col3 ... to generate a dialect specific concat expression
+        result = columns[0]
+        if len(columns) == 1:
+            return result
+        # Cast because CONCAT is only generated for string columns
+        result = sa.cast(result, sa.String)
+        for col in columns[1:]:
+            result = operator.add(result, sa.cast(col, sa.String))
+        return result
+
+    @classmethod
+    def gen_scd2_sql(
+        cls,
+        table_chain: Sequence[PreparedTableSchema],
+        sql_client: SqlalchemyClient,  # type: ignore[override]
+    ) -> List[str]:
+        sqla_statements = []
+        root_table = table_chain[0]
+        root_table_obj = sql_client.get_existing_table(root_table["name"])
+        staging_root_table_obj = root_table_obj.to_metadata(
+            sql_client.metadata, schema=sql_client.staging_dataset_name
+        )
+
+        from_, to = get_validity_column_names(root_table)
+        hash_ = get_first_column_name_with_prop(root_table, "x-row-version")
+
+        caps = sql_client.capabilities
+
+        format_datetime_literal = caps.format_datetime_literal
+        if format_datetime_literal is None:
+            format_datetime_literal = (
+                DestinationCapabilitiesContext.generic_capabilities().format_datetime_literal
+            )
+
+        boundary_ts = ensure_pendulum_datetime(
+            root_table.get("x-boundary-timestamp", current_load_package()["state"]["created_at"])  # type: ignore[arg-type]
+        )
+
+        boundary_literal = format_datetime_literal(boundary_ts, caps.timestamp_precision)
+
+        active_record_timestamp = get_active_record_timestamp(root_table)
+
+        update_statement = (
+            root_table_obj.update()
+            .values({to: sa.text(boundary_literal)})
+            .where(root_table_obj.c[hash_].notin_(sa.select(staging_root_table_obj.c[hash_])))
+        )
+
+        if active_record_timestamp is None:
+            active_record_literal = None
+            root_is_active_clause = root_table_obj.c[to].is_(None)
+        else:
+            active_record_literal = format_datetime_literal(
+                active_record_timestamp, caps.timestamp_precision
+            )
+            root_is_active_clause = root_table_obj.c[to] == sa.text(active_record_literal)
+
+        update_statement = update_statement.where(root_is_active_clause)
+
+        merge_keys = get_columns_names_with_prop(root_table, "merge_key")
+        if merge_keys:
+            root_merge_key_cols = [root_table_obj.c[key] for key in merge_keys]
+            staging_merge_key_cols = [staging_root_table_obj.c[key] for key in merge_keys]
+
+            update_statement = update_statement.where(
+                cls._gen_concat_sqla(root_merge_key_cols).in_(
+                    sa.select(cls._gen_concat_sqla(staging_merge_key_cols))
+                )
+            )
+
+        sqla_statements.append(update_statement)
+
+        insert_statement = root_table_obj.insert().from_select(
+            [col.name for col in root_table_obj.columns],
+            sa.select(
+                sa.literal(boundary_literal.strip("'")).label(from_),
+                sa.literal(
+                    active_record_literal.strip("'") if active_record_literal is not None else None
+                ).label(to),
+                *[c for c in staging_root_table_obj.columns if c.name not in [from_, to]],
+            ).where(
+                staging_root_table_obj.c[hash_].notin_(
+                    sa.select(root_table_obj.c[hash_]).where(root_is_active_clause)
+                )
+            ),
+        )
+        sqla_statements.append(insert_statement)
+
+        nested_tables = table_chain[1:]
+        for table in nested_tables:
+            row_key_column = cls._get_root_key_col(table_chain, sql_client, table)
+
+            table_obj = sql_client.get_existing_table(table["name"])
+            staging_table_obj = table_obj.to_metadata(
+                sql_client.metadata, schema=sql_client.staging_dataset_name
+            )
+
+            insert_statement = table_obj.insert().from_select(
+                [col.name for col in table_obj.columns],
+                staging_table_obj.select().where(
+                    staging_table_obj.c[row_key_column].notin_(
+                        sa.select(table_obj.c[row_key_column])
+                    )
+                ),
+            )
+            sqla_statements.append(insert_statement)
+
+        return [
+            x + ";" if not x.endswith(";") else x
+            for x in (
+                str(stmt.compile(sql_client.engine, compile_kwargs={"literal_binds": True}))
+                for stmt in sqla_statements
+            )
+        ]
diff --git a/dlt/destinations/impl/sqlalchemy/sqlalchemy_job_client.py b/dlt/destinations/impl/sqlalchemy/sqlalchemy_job_client.py
index a2514a43e0..c5a6442d8a 100644
--- a/dlt/destinations/impl/sqlalchemy/sqlalchemy_job_client.py
+++ b/dlt/destinations/impl/sqlalchemy/sqlalchemy_job_client.py
@@ -18,7 +18,11 @@
 from dlt.common.destination.capabilities import DestinationCapabilitiesContext
 from dlt.common.schema import Schema, TTableSchema, TColumnSchema, TSchemaTables
 from dlt.common.schema.typing import TColumnType, TTableSchemaColumns
-from dlt.common.schema.utils import pipeline_state_table, normalize_table_identifiers
+from dlt.common.schema.utils import (
+    pipeline_state_table,
+    normalize_table_identifiers,
+    is_complete_column,
+)
 from dlt.destinations.exceptions import DatabaseUndefinedRelation
 from dlt.destinations.impl.sqlalchemy.db_api_client import SqlalchemyClient
 from dlt.destinations.impl.sqlalchemy.configuration import SqlalchemyClientConfiguration
@@ -26,6 +30,7 @@
     SqlalchemyJsonLInsertJob,
     SqlalchemyParquetInsertJob,
     SqlalchemyStagingCopyJob,
+    SqlalchemyMergeFollowupJob,
 )
 
 
@@ -65,6 +70,7 @@ def _to_table_object(self, schema_table: PreparedTableSchema) -> sa.Table:
             *[
                 self._to_column_object(col, schema_table)
                 for col in schema_table["columns"].values()
+                if is_complete_column(col)
             ],
             extend_existing=True,
             schema=self.sql_client.dataset_name,
@@ -97,13 +103,10 @@ def _create_replace_followup_jobs(
     def _create_merge_followup_jobs(
         self, table_chain: Sequence[PreparedTableSchema]
     ) -> List[FollowupJobRequest]:
+        # Ensure all tables exist in metadata before generating sql job
         for table in table_chain:
             self._to_table_object(table)
-        return [
-            SqlalchemyStagingCopyJob.from_table_chain(
-                table_chain, self.sql_client, {"replace": False}
-            )
-        ]
+        return [SqlalchemyMergeFollowupJob.from_table_chain(table_chain, self.sql_client)]
 
     def create_load_job(
         self, table: PreparedTableSchema, file_path: str, load_id: str, restore: bool = False
diff --git a/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md b/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md
index b9014e0564..9f33c02337 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/sqlalchemy.md
@@ -135,8 +135,7 @@ The following write dispositions are supported:
 
 - `append`
 - `replace` with `truncate-and-insert` and `insert-from-staging` replace strategies. `staging-optimized` falls back to `insert-from-staging`.
-
-The `merge` disposition is not supported and falls back to `append`.
+- `merge` with `delete-insert` and `scd2` merge strategies.
 
 ## Data loading
 
diff --git a/tests/load/pipeline/test_scd2.py b/tests/load/pipeline/test_scd2.py
index 3e08b792ed..2a5b9ed296 100644
--- a/tests/load/pipeline/test_scd2.py
+++ b/tests/load/pipeline/test_scd2.py
@@ -52,13 +52,22 @@ def strip_timezone(ts: TAnyDateTime) -> pendulum.DateTime:
 
 
 def get_table(
-    pipeline: dlt.Pipeline, table_name: str, sort_column: str = None, include_root_id: bool = True
+    pipeline: dlt.Pipeline,
+    table_name: str,
+    sort_column: str = None,
+    include_root_id: bool = True,
+    ts_columns: Optional[List[str]] = None,
 ) -> List[Dict[str, Any]]:
     """Returns destination table contents as list of dictionaries."""
+    ts_columns = ts_columns or []
 
     table = [
         {
-            k: strip_timezone(v) if isinstance(v, datetime) else v
+            k: (
+                strip_timezone(v)
+                if isinstance(v, datetime) or (k in ts_columns and v is not None)
+                else v
+            )
             for k, v in r.items()
             if not k.startswith("_dlt")
             or k in DEFAULT_VALIDITY_COLUMN_NAMES
@@ -128,7 +137,7 @@ def r(data):
     # assert load results
     ts_1 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    assert get_table(p, "dim_test", "c2__nc1") == [
+    assert get_table(p, "dim_test", "c2__nc1", ts_columns=[from_, to]) == [
         {
             from_: ts_1,
             to: None,
@@ -153,7 +162,7 @@ def r(data):
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     ts_2 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    assert get_table(p, "dim_test", "c2__nc1") == [
+    assert get_table(p, "dim_test", "c2__nc1", ts_columns=[from_, to]) == [
         {
             from_: ts_1,
             to: None,
@@ -178,7 +187,7 @@ def r(data):
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     ts_3 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    assert get_table(p, "dim_test", "c2__nc1") == [
+    assert get_table(p, "dim_test", "c2__nc1", ts_columns=[from_, to]) == [
         {from_: ts_1, to: ts_3, "nk": 2, "c1": "bar", "c2__nc1": "bar"},
         {from_: ts_1, to: ts_2, "nk": 1, "c1": "foo", "c2__nc1": "foo"},
         {
@@ -198,7 +207,7 @@ def r(data):
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     ts_4 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    assert get_table(p, "dim_test", "c2__nc1") == [
+    assert get_table(p, "dim_test", "c2__nc1", ts_columns=[from_, to]) == [
         {from_: ts_1, to: ts_3, "nk": 2, "c1": "bar", "c2__nc1": "bar"},
         {
             from_: ts_4,
@@ -242,7 +251,7 @@ def r(data):
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     ts_1 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    assert get_table(p, "dim_test", "c1") == [
+    assert get_table(p, "dim_test", "c1", ts_columns=[FROM, TO]) == [
         {FROM: ts_1, TO: None, "nk": 2, "c1": "bar"},
         {FROM: ts_1, TO: None, "nk": 1, "c1": "foo"},
     ]
@@ -261,7 +270,7 @@ def r(data):
     info = p.run(r(dim_snap), **destination_config.run_kwargs)
     ts_2 = get_load_package_created_at(p, info)
     assert_load_info(info)
-    assert get_table(p, "dim_test", "c1") == [
+    assert get_table(p, "dim_test", "c1", ts_columns=[FROM, TO]) == [
         {FROM: ts_1, TO: None, "nk": 2, "c1": "bar"},
         {FROM: ts_1, TO: ts_2, "nk": 1, "c1": "foo"},  # updated
         {FROM: ts_2, TO: None, "nk": 1, "c1": "foo_updated"},  # new
@@ -289,7 +298,7 @@ def r(data):
     ts_3 = get_load_package_created_at(p, info)
     assert_load_info(info)
     assert_records_as_set(
-        get_table(p, "dim_test"),
+        get_table(p, "dim_test", ts_columns=[FROM, TO]),
         [
             {FROM: ts_1, TO: None, "nk": 2, "c1": "bar"},
             {FROM: ts_1, TO: ts_2, "nk": 1, "c1": "foo"},
@@ -315,7 +324,7 @@ def r(data):
     ts_4 = get_load_package_created_at(p, info)
     assert_load_info(info)
     assert_records_as_set(
-        get_table(p, "dim_test"),
+        get_table(p, "dim_test", ts_columns=[FROM, TO]),
         [
             {FROM: ts_1, TO: ts_4, "nk": 2, "c1": "bar"},  # updated
             {FROM: ts_1, TO: ts_2, "nk": 1, "c1": "foo"},
@@ -336,7 +345,7 @@ def r(data):
     ts_5 = get_load_package_created_at(p, info)
     assert_load_info(info)
     assert_records_as_set(
-        get_table(p, "dim_test"),
+        get_table(p, "dim_test", ts_columns=[FROM, TO]),
         [
             {FROM: ts_1, TO: ts_4, "nk": 2, "c1": "bar"},
             {FROM: ts_5, TO: None, "nk": 3, "c1": "baz"},  # new
@@ -502,7 +511,7 @@ def r(data):
         {**{FROM: ts_3, TO: None}, **r1_no_child},
         {**{FROM: ts_1, TO: None}, **r2_no_child},
     ]
-    assert_records_as_set(get_table(p, "dim_test"), expected)
+    assert_records_as_set(get_table(p, "dim_test", ts_columns=[FROM, TO]), expected)
 
     # assert child records
     expected = [
@@ -739,7 +748,10 @@ def dim_test(data):
     assert load_table_counts(p, "dim_test")["dim_test"] == 3
     ts3 = get_load_package_created_at(p, info)
     # natural key 1 should now have two records (one retired, one active)
-    actual = [{k: v for k, v in row.items() if k in ("nk", TO)} for row in get_table(p, "dim_test")]
+    actual = [
+        {k: v for k, v in row.items() if k in ("nk", TO)}
+        for row in get_table(p, "dim_test", ts_columns=[FROM, TO])
+    ]
     expected = [{"nk": 1, TO: ts3}, {"nk": 1, TO: None}, {"nk": 2, TO: None}]
     assert_records_as_set(actual, expected)  # type: ignore[arg-type]
 
@@ -753,7 +765,10 @@ def dim_test(data):
     assert load_table_counts(p, "dim_test")["dim_test"] == 4
     ts4 = get_load_package_created_at(p, info)
     # natural key 1 should now have three records (two retired, one active)
-    actual = [{k: v for k, v in row.items() if k in ("nk", TO)} for row in get_table(p, "dim_test")]
+    actual = [
+        {k: v for k, v in row.items() if k in ("nk", TO)}
+        for row in get_table(p, "dim_test", ts_columns=[FROM, TO])
+    ]
     expected = [{"nk": 1, TO: ts3}, {"nk": 1, TO: ts4}, {"nk": 1, TO: None}, {"nk": 2, TO: None}]
     assert_records_as_set(actual, expected)  # type: ignore[arg-type]
 
@@ -805,7 +820,7 @@ def dim_test_compound(data):
     # "Doe" should now have two records (one retired, one active)
     actual = [
         {k: v for k, v in row.items() if k in ("first_name", "last_name", TO)}
-        for row in get_table(p, "dim_test_compound")
+        for row in get_table(p, "dim_test_compound", ts_columns=[FROM, TO])
     ]
     expected = [
         {"first_name": first_name, "last_name": "Doe", TO: ts3},
@@ -869,7 +884,7 @@ def dim_test(data):
     ts2 = get_load_package_created_at(p, info)
     actual = [
         {k: v for k, v in row.items() if k in ("date", "name", TO)}
-        for row in get_table(p, "dim_test")
+        for row in get_table(p, "dim_test", ts_columns=[TO])
     ]
     expected = [
         {"date": "2024-01-01", "name": "a", TO: None},
diff --git a/tests/load/pipeline/test_write_disposition_changes.py b/tests/load/pipeline/test_write_disposition_changes.py
index f7d915903e..fad244fa71 100644
--- a/tests/load/pipeline/test_write_disposition_changes.py
+++ b/tests/load/pipeline/test_write_disposition_changes.py
@@ -128,11 +128,15 @@ def source():
     # schemaless destinations allow adding of root key without the pipeline failing
     # they do not mind adding NOT NULL columns to tables with existing data (id NOT NULL is supported at all)
     # doing this will result in somewhat useless behavior
-    destination_allows_adding_root_key = destination_config.destination_type in [
-        "dremio",
-        "clickhouse",
-        "athena",
-    ]
+    destination_allows_adding_root_key = (
+        destination_config.destination_type
+        in [
+            "dremio",
+            "clickhouse",
+            "athena",
+        ]
+        or destination_config.destination_name == "sqlalchemy_mysql"
+    )
 
     if destination_allows_adding_root_key and not with_root_key:
         pipeline.run(
diff --git a/tests/load/utils.py b/tests/load/utils.py
index 19601f2cf1..9cfb6984a5 100644
--- a/tests/load/utils.py
+++ b/tests/load/utils.py
@@ -331,13 +331,13 @@ def destinations_configs(
         destination_configs += [
             DestinationTestConfiguration(
                 destination_type="sqlalchemy",
-                supports_merge=False,
+                supports_merge=True,
                 supports_dbt=False,
                 destination_name="sqlalchemy_mysql",
             ),
             DestinationTestConfiguration(
                 destination_type="sqlalchemy",
-                supports_merge=False,
+                supports_merge=True,
                 supports_dbt=False,
                 destination_name="sqlalchemy_sqlite",
             ),

From b5b3ab1fde535b4870f7a8063a66c99e43ba7664 Mon Sep 17 00:00:00 2001
From: Anton Burnashev <anton.burnashev@gmail.com>
Date: Mon, 7 Oct 2024 12:21:04 +0200
Subject: [PATCH 28/29] Docs: fix the redirect in netlify.toml (#1930)

---
 docs/website/netlify.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/website/netlify.toml b/docs/website/netlify.toml
index fc3f270005..51cc4ee21f 100644
--- a/docs/website/netlify.toml
+++ b/docs/website/netlify.toml
@@ -41,4 +41,4 @@ to = "/docs/reference/telemetry"
 
 [[redirects]]
 from = "/docs/walkthroughs"
-to = "docs/intro"
+to = "/docs/intro"

From 2d07a43d48b424b56cadce714ed4ca2ce6a66aa4 Mon Sep 17 00:00:00 2001
From: rudolfix <rudolfix@rudolfix.org>
Date: Mon, 7 Oct 2024 21:15:26 +0200
Subject: [PATCH 29/29] enables gcs staging databricks (#1933)

* enables gcs staging for Databricks via named credential

* bumps dlt to 1.2.0

* Update docs/website/docs/dlt-ecosystem/destinations/databricks.md

* Update docs/website/docs/dlt-ecosystem/destinations/databricks.md

* fixes gcs databricks test

---------

Co-authored-by: Anton Burnashev <anton.burnashev@gmail.com>
---
 .../impl/databricks/databricks.py             | 16 +++--
 .../dlt-ecosystem/destinations/databricks.md  |  7 ++-
 pyproject.toml                                |  2 +-
 .../load/pipeline/test_databricks_pipeline.py | 61 ++++++++++++++++++-
 4 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/dlt/destinations/impl/databricks/databricks.py b/dlt/destinations/impl/databricks/databricks.py
index 54d37f8c08..fbf552d3b1 100644
--- a/dlt/destinations/impl/databricks/databricks.py
+++ b/dlt/destinations/impl/databricks/databricks.py
@@ -33,6 +33,7 @@
 from dlt.destinations.job_impl import ReferenceFollowupJobRequest
 
 AZURE_BLOB_STORAGE_PROTOCOLS = ["az", "abfss", "abfs"]
+SUPPORTED_BLOB_STORAGE_PROTOCOLS = AZURE_BLOB_STORAGE_PROTOCOLS + ["s3", "gs", "gcs"]
 
 
 class DatabricksLoadJob(RunnableLoadJob, HasFollowupJobs):
@@ -69,11 +70,12 @@ def run(self) -> None:
             bucket_url = urlparse(bucket_path)
             bucket_scheme = bucket_url.scheme
 
-            if bucket_scheme not in AZURE_BLOB_STORAGE_PROTOCOLS + ["s3"]:
+            if bucket_scheme not in SUPPORTED_BLOB_STORAGE_PROTOCOLS:
                 raise LoadJobTerminalException(
                     self._file_path,
-                    f"Databricks cannot load data from staging bucket {bucket_path}. Only s3 and"
-                    " azure buckets are supported",
+                    f"Databricks cannot load data from staging bucket {bucket_path}. Only s3, azure"
+                    " and gcs buckets are supported. Please note that gcs buckets are supported"
+                    " only via named credential",
                 )
 
             if self._job_client.config.is_staging_external_location:
@@ -106,6 +108,12 @@ def run(self) -> None:
                     bucket_path = self.ensure_databricks_abfss_url(
                         bucket_path, staging_credentials.azure_storage_account_name
                     )
+                else:
+                    raise LoadJobTerminalException(
+                        self._file_path,
+                        "You need to use Databricks named credential to use google storage."
+                        " Passing explicit Google credentials is not supported by Databricks.",
+                    )
 
             if bucket_scheme in AZURE_BLOB_STORAGE_PROTOCOLS:
                 assert isinstance(
@@ -125,7 +133,7 @@ def run(self) -> None:
             raise LoadJobTerminalException(
                 self._file_path,
                 "Cannot load from local file. Databricks does not support loading from local files."
-                " Configure staging with an s3 or azure storage bucket.",
+                " Configure staging with an s3, azure or google storage bucket.",
             )
 
         # decide on source format, stage_file_path will either be a local file or a bucket path
diff --git a/docs/website/docs/dlt-ecosystem/destinations/databricks.md b/docs/website/docs/dlt-ecosystem/destinations/databricks.md
index ddbf930306..08d2f0751c 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/databricks.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/databricks.md
@@ -141,7 +141,7 @@ The `jsonl` format has some limitations when used with Databricks:
 
 ## Staging support
 
-Databricks supports both Amazon S3 and Azure Blob Storage as staging locations. `dlt` will upload files in `parquet` format to the staging location and will instruct Databricks to load data from there.
+Databricks supports both Amazon S3, Azure Blob Storage and Google Cloud Storage as staging locations. `dlt` will upload files in `parquet` format to the staging location and will instruct Databricks to load data from there.
 
 ### Databricks and Amazon S3
 
@@ -187,6 +187,11 @@ pipeline = dlt.pipeline(
 
 ```
 
+### Databricks and Google Cloud Storage
+
+In order to load from Google Cloud Storage stage you must set-up the credentials via **named credential**. See below. Databricks does not allow to pass Google Credentials
+explicitly in SQL Statements.
+
 ### Use external locations and stored credentials
 `dlt` forwards bucket credentials to the `COPY INTO` SQL command by default. You may prefer to use [external locations or stored credentials instead](https://docs.databricks.com/en/sql/language-manual/sql-ref-external-locations.html#external-location) that are stored on the Databricks side.
 
diff --git a/pyproject.toml b/pyproject.toml
index 2c47eb43c0..7b378429cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dlt"
-version = "1.1.0"
+version = "1.2.0"
 description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run."
 authors = ["dltHub Inc. <services@dlthub.com>"]
 maintainers = [ "Marcin Rudolf <marcin@dlthub.com>", "Adrian Brudaru <adrian@dlthub.com>", "Anton Burnashev <anton@dlthub.com>", "David Scharf <david@dlthub.com>" ]
diff --git a/tests/load/pipeline/test_databricks_pipeline.py b/tests/load/pipeline/test_databricks_pipeline.py
index 2225d0001c..e802cde693 100644
--- a/tests/load/pipeline/test_databricks_pipeline.py
+++ b/tests/load/pipeline/test_databricks_pipeline.py
@@ -2,7 +2,12 @@
 import os
 
 from dlt.common.utils import uniq_id
-from tests.load.utils import DestinationTestConfiguration, destinations_configs, AZ_BUCKET
+from tests.load.utils import (
+    GCS_BUCKET,
+    DestinationTestConfiguration,
+    destinations_configs,
+    AZ_BUCKET,
+)
 from tests.pipeline.utils import assert_load_info
 
 
@@ -13,7 +18,7 @@
 @pytest.mark.parametrize(
     "destination_config",
     destinations_configs(
-        default_sql_configs=True, bucket_subset=(AZ_BUCKET), subset=("databricks",)
+        default_sql_configs=True, bucket_subset=(AZ_BUCKET,), subset=("databricks",)
     ),
     ids=lambda x: x.name,
 )
@@ -62,7 +67,7 @@ def test_databricks_external_location(destination_config: DestinationTestConfigu
         in pipeline.list_failed_jobs_in_package(info.loads_ids[0])[0].failed_message
     )
 
-    # # should fail on non existing stored credentials
+    # should fail on non existing stored credentials
     bricks = databricks(is_staging_external_location=False, staging_credentials_name="CREDENTIAL_X")
     pipeline = destination_config.setup_pipeline(
         "test_databricks_external_location",
@@ -90,3 +95,53 @@ def test_databricks_external_location(destination_config: DestinationTestConfigu
     assert (
         "credential_x" in pipeline.list_failed_jobs_in_package(info.loads_ids[0])[0].failed_message
     )
+
+
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(
+        default_sql_configs=True, bucket_subset=(AZ_BUCKET,), subset=("databricks",)
+    ),
+    ids=lambda x: x.name,
+)
+def test_databricks_gcs_external_location(destination_config: DestinationTestConfiguration) -> None:
+    # do not interfere with state
+    os.environ["RESTORE_FROM_DESTINATION"] = "False"
+    # let the package complete even with failed jobs
+    os.environ["RAISE_ON_FAILED_JOBS"] = "false"
+
+    dataset_name = "test_databricks_gcs_external_location" + uniq_id()
+
+    # swap AZ bucket for GCS_BUCKET
+    from dlt.destinations import databricks, filesystem
+
+    stage = filesystem(GCS_BUCKET)
+
+    # explicit cred handover should fail
+    bricks = databricks()
+    pipeline = destination_config.setup_pipeline(
+        "test_databricks_gcs_external_location",
+        dataset_name=dataset_name,
+        destination=bricks,
+        staging=stage,
+    )
+    info = pipeline.run([1, 2, 3], table_name="digits", **destination_config.run_kwargs)
+    assert info.has_failed_jobs is True
+    assert (
+        "You need to use Databricks named credential"
+        in pipeline.list_failed_jobs_in_package(info.loads_ids[0])[0].failed_message
+    )
+
+    # should fail on non existing stored credentials
+    bricks = databricks(is_staging_external_location=False, staging_credentials_name="CREDENTIAL_X")
+    pipeline = destination_config.setup_pipeline(
+        "test_databricks_external_location",
+        dataset_name=dataset_name,
+        destination=bricks,
+        staging=stage,
+    )
+    info = pipeline.run([1, 2, 3], table_name="digits", **destination_config.run_kwargs)
+    assert info.has_failed_jobs is True
+    assert (
+        "credential_x" in pipeline.list_failed_jobs_in_package(info.loads_ids[0])[0].failed_message
+    )