diff --git a/.github/workflows/test_doc_snippets.yml b/.github/workflows/test_doc_snippets.yml
index d5091e0fdb..b2a2f241db 100644
--- a/.github/workflows/test_doc_snippets.yml
+++ b/.github/workflows/test_doc_snippets.yml
@@ -18,6 +18,9 @@ env:
DESTINATION__WEAVIATE__VECTORIZER: text2vec-contextionary
DESTINATION__WEAVIATE__MODULE_CONFIG: "{\"text2vec-contextionary\": {\"vectorizeClassName\": false, \"vectorizePropertyName\": true}}"
+ # zendesk vars for example
+ SOURCES__ZENDESK__CREDENTIALS: ${{ secrets.ZENDESK__CREDENTIALS }}
+
jobs:
run_lint:
diff --git a/.gitignore b/.gitignore
index 4a52ad15e8..3604d9b1bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ experiments/*
# !experiments/pipeline/
# !experiments/pipeline/*
secrets.toml
+!docs/**/secrets.toml
*.session.sql
*.duckdb
*.wal
diff --git a/Makefile b/Makefile
index 8a7f14a5db..bd522c9ba3 100644
--- a/Makefile
+++ b/Makefile
@@ -27,6 +27,8 @@ help:
@echo " tests all components unsing local destinations: duckdb and postgres"
@echo " test-common"
@echo " tests common components"
+ @echo " test-and-lint-snippets"
+ @echo " tests and lints snippets and examples in docs"
@echo " build-library"
@echo " makes dev and then builds dlt package for distribution"
@echo " publish-library"
diff --git a/dlt/common/schema/schema.py b/dlt/common/schema/schema.py
index 2889d776c5..df1f2dc491 100644
--- a/dlt/common/schema/schema.py
+++ b/dlt/common/schema/schema.py
@@ -420,7 +420,7 @@ def _coerce_non_null_value(self, table_columns: TTableSchemaColumns, table_name:
raise CannotCoerceColumnException(table_name, col_name, py_type, table_columns[col_name]["data_type"], v)
# otherwise we must create variant extension to the table
# pass final=True so no more auto-variants can be created recursively
- # TODO: generate callback so DLT user can decide what to do
+ # TODO: generate callback so dlt user can decide what to do
variant_col_name = self.naming.shorten_fragments(col_name, VARIANT_FIELD_FORMAT % py_type)
return self._coerce_non_null_value(table_columns, table_name, variant_col_name, v, is_variant=True)
diff --git a/dlt/common/storages/exceptions.py b/dlt/common/storages/exceptions.py
index cab149c22c..3203191cd8 100644
--- a/dlt/common/storages/exceptions.py
+++ b/dlt/common/storages/exceptions.py
@@ -51,7 +51,7 @@ class SchemaStorageException(StorageException):
class InStorageSchemaModified(SchemaStorageException):
def __init__(self, schema_name: str, storage_path: str) -> None:
- msg = f"Schema {schema_name} in {storage_path} was externally modified. This is not allowed as that would prevent correct version tracking. Use import/export capabilities of DLT to provide external changes."
+ msg = f"Schema {schema_name} in {storage_path} was externally modified. This is not allowed as that would prevent correct version tracking. Use import/export capabilities of dlt to provide external changes."
super().__init__(msg)
diff --git a/dlt/helpers/airflow_helper.py b/dlt/helpers/airflow_helper.py
index 3cdda43f8b..2a9c76cc76 100644
--- a/dlt/helpers/airflow_helper.py
+++ b/dlt/helpers/airflow_helper.py
@@ -38,7 +38,7 @@
class PipelineTasksGroup(TaskGroup):
"""
- Represents a DLT Airflow pipeline task group.
+ Represents a dlt Airflow pipeline task group.
"""
def __init__(
diff --git a/dlt/helpers/streamlit_helper.py b/dlt/helpers/streamlit_helper.py
index 52584996cf..a8881563fb 100644
--- a/dlt/helpers/streamlit_helper.py
+++ b/dlt/helpers/streamlit_helper.py
@@ -208,7 +208,7 @@ def write_data_explorer_page(pipeline: Pipeline, schema_name: str = None, show_d
#### Args:
pipeline (Pipeline): Pipeline instance to use.
schema_name (str, optional): Name of the schema to display. If None, default schema is used.
- show_dlt_tables (bool, optional): Should show DLT internal tables. Defaults to False.
+ show_dlt_tables (bool, optional): Should show dlt internal tables. Defaults to False.
example_query (str, optional): Example query to be displayed in the SQL Query box.
show_charts (bool, optional): Should automatically show charts for the queries from SQL Query box. Defaults to True.
diff --git a/docs/examples/.dlt/secrets.toml b/docs/examples/.dlt/secrets.toml
new file mode 100644
index 0000000000..6593439cc0
--- /dev/null
+++ b/docs/examples/.dlt/secrets.toml
@@ -0,0 +1,26 @@
+# here is a file with the secrets for all the example pipelines in `examples` folder
+
+[sources]
+# redshift password for query tables example
+query_table.credentials.password="8P5gyDPNo9zo582rQG6a"
+query_sql.credentials.password="8P5gyDPNo9zo582rQG6a"
+
+# google sheets example
+[sources.google_spreadsheet.credentials]
+project_id="chat-analytics-317513"
+client_email="loader@chat-analytics-317513.iam.gserviceaccount.com"
+private_key="-----BEGIN PRIVATE KEY-----\nMIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQCNEN0bL39HmD+S\n7inCg8CdRKEMZ/q7Rv5uUiTyUMjQLNXySOPRSSJBSXBPpLJPbcmfxCYgOPWadA3F\noa54WJFR3Uxd+SjAC848dGz5+JEL5u2rHcjzL1IbjDd5oH9rap/QxYm/R9Q5eSdC\nlGiiFh4zH+U9nhWWUovn+ofixbQkhrMFOfgHt+Jvdh/2m7Sdz47itjWFC258R1Ki\nH9vPVtHxw0LrcUZN7HACV3NICRUkvX8U2+JC25esmPxc/qqmxoFlQ7ono/NeMjQa\nq2TyTyNSh4pDtf30PAW4cU2AUbtaSPhIdRloDuwzW9d88VUUbVelqHHeJRgGjkSX\nQz2MCBuFAgMBAAECgf8zlepWYEgm8xtdim2ydB3mdhR/zoZmZM/Q1NthJ8u/IbdO\nb2HPEXxGbDKIIJzhAA17Un98leBLwYKuLZhOpdB+igyJlTG8XlCRF88XiUosJWR3\niHmiuMkndHA7WyTXDc0n3GpUFYWkGGng2cKLx7V/OFmpMwhC9LEKMNOrBKnf9U6Z\n/9nanIerFZU4m5mWbNW/ZRc+qvd+1zGw/JYM6ntdkKLo/TwNOmOS5FS01yLvx7Xw\nm12f9I3VceGXWyrYEh+UCWk0gsEb8xnGGZKy3op5I6trsXzH8I3HCXvapkeWSaFe\n/gmT3CHZIK9hang6f4yMG+niuNtZE2/METgvcjkCgYEAwTg1SZAYSaL6LoFV92Kq\nyHV0DP8KivDIKrByOym9oikPK/2ZMNi9NivVmSALuR54wj7pFxFmyEj6UTklSeLb\nRvOjcPnZEMbFspRHIzkySfsnfScoHZXOeakjOub1K5FehYsLXQIfe7iwRg/mcd/2\noFVyJrm2aNXcvNuug4scEE0CgYEAuuaRmGY5mKv+viuZ/zzOky7IjDnp4w2BMJt0\noMXznKuLHJpexnQ9A/ZHxpAp6Bi6Glk0XLi2uaI+ggXlEUfNa3DHMQu7xg1RaCqN\n957WGRO0ETtIWdp1BHhWPtT5kdOrjSZy9vRSZ0vh2WnZe5SgKRVCqQsV7ExcEltz\nUc9WlBkCgYA9MaQOzEgk6iz6FZQ4aVNVcX1zsEKShneerYtAGZQpi392mzatNbeX\nNILNoEyWMIRmYK5J1AUNYa+FkeexYtu3uOoGmdqZaZqrWDK/gRngPF7hUElwNUXT\nWjICMatsRPn+qW7L4iQ+dtu9FMQTRK9DUEx6305aHYFvftPibWhR8QKBgQCAd3GG\nNmXKihaMsr2kUjCPvG1+7WPVfHfbaE9PHyFnBAaXv4f7kvRJn+QQGRGlBjINYFl8\njj6S9HFQwCqGqTsKabeQ/8auyIK3PeDdXqE9FW0FFyGRGXarfueRQqTU1pCpcc89\n7gwiEmeIIJiruCoqcwGh3gvQo1/6AkAO8JxLKQKBgF0T8P0hRctXFejcFf/4EikS\n2+WA/gNSQITC1m+8nWNnU+bDmRax+pIkzlvjkG5kyNfWvB7i2A5Y5OnCo92y5aDQ\nzbGHLwZj0HXqLFXhbAv/0xZPXlZ71NWpi2BpCJRnzU65ftsjePfydfvN6g4mPQ28\nkHQsYKUZk5HPC8FlPvQe\n-----END PRIVATE KEY-----\n"
+
+[destination]
+# all postgres destinations for all examples
+postgres.credentials = "postgres://loader:loader@localhost:5432/dlt_data"
+# all redshift destinations for all examples
+redshift.credentials = "postgres://loader:8P5gyDPNo9zo582rQG6a@chat-analytics.czwteevq7bpe.eu-central-1.redshift.amazonaws.com:5439/chat_analytics_rasa"
+
+# all the bigquery destinations
+[destination.bigquery.credentials]
+project_id="chat-analytics-317513"
+client_email="loader@chat-analytics-317513.iam.gserviceaccount.com"
+private_key="-----BEGIN PRIVATE KEY-----\nMIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQCNEN0bL39HmD+S\n7inCg8CdRKEMZ/q7Rv5uUiTyUMjQLNXySOPRSSJBSXBPpLJPbcmfxCYgOPWadA3F\noa54WJFR3Uxd+SjAC848dGz5+JEL5u2rHcjzL1IbjDd5oH9rap/QxYm/R9Q5eSdC\nlGiiFh4zH+U9nhWWUovn+ofixbQkhrMFOfgHt+Jvdh/2m7Sdz47itjWFC258R1Ki\nH9vPVtHxw0LrcUZN7HACV3NICRUkvX8U2+JC25esmPxc/qqmxoFlQ7ono/NeMjQa\nq2TyTyNSh4pDtf30PAW4cU2AUbtaSPhIdRloDuwzW9d88VUUbVelqHHeJRgGjkSX\nQz2MCBuFAgMBAAECgf8zlepWYEgm8xtdim2ydB3mdhR/zoZmZM/Q1NthJ8u/IbdO\nb2HPEXxGbDKIIJzhAA17Un98leBLwYKuLZhOpdB+igyJlTG8XlCRF88XiUosJWR3\niHmiuMkndHA7WyTXDc0n3GpUFYWkGGng2cKLx7V/OFmpMwhC9LEKMNOrBKnf9U6Z\n/9nanIerFZU4m5mWbNW/ZRc+qvd+1zGw/JYM6ntdkKLo/TwNOmOS5FS01yLvx7Xw\nm12f9I3VceGXWyrYEh+UCWk0gsEb8xnGGZKy3op5I6trsXzH8I3HCXvapkeWSaFe\n/gmT3CHZIK9hang6f4yMG+niuNtZE2/METgvcjkCgYEAwTg1SZAYSaL6LoFV92Kq\nyHV0DP8KivDIKrByOym9oikPK/2ZMNi9NivVmSALuR54wj7pFxFmyEj6UTklSeLb\nRvOjcPnZEMbFspRHIzkySfsnfScoHZXOeakjOub1K5FehYsLXQIfe7iwRg/mcd/2\noFVyJrm2aNXcvNuug4scEE0CgYEAuuaRmGY5mKv+viuZ/zzOky7IjDnp4w2BMJt0\noMXznKuLHJpexnQ9A/ZHxpAp6Bi6Glk0XLi2uaI+ggXlEUfNa3DHMQu7xg1RaCqN\n957WGRO0ETtIWdp1BHhWPtT5kdOrjSZy9vRSZ0vh2WnZe5SgKRVCqQsV7ExcEltz\nUc9WlBkCgYA9MaQOzEgk6iz6FZQ4aVNVcX1zsEKShneerYtAGZQpi392mzatNbeX\nNILNoEyWMIRmYK5J1AUNYa+FkeexYtu3uOoGmdqZaZqrWDK/gRngPF7hUElwNUXT\nWjICMatsRPn+qW7L4iQ+dtu9FMQTRK9DUEx6305aHYFvftPibWhR8QKBgQCAd3GG\nNmXKihaMsr2kUjCPvG1+7WPVfHfbaE9PHyFnBAaXv4f7kvRJn+QQGRGlBjINYFl8\njj6S9HFQwCqGqTsKabeQ/8auyIK3PeDdXqE9FW0FFyGRGXarfueRQqTU1pCpcc89\n7gwiEmeIIJiruCoqcwGh3gvQo1/6AkAO8JxLKQKBgF0T8P0hRctXFejcFf/4EikS\n2+WA/gNSQITC1m+8nWNnU+bDmRax+pIkzlvjkG5kyNfWvB7i2A5Y5OnCo92y5aDQ\nzbGHLwZj0HXqLFXhbAv/0xZPXlZ71NWpi2BpCJRnzU65ftsjePfydfvN6g4mPQ28\nkHQsYKUZk5HPC8FlPvQe\n-----END PRIVATE KEY-----\n"
+
+
diff --git a/docs/examples/.dlt/example.secrets.toml b/docs/examples/archive/.dlt/example.secrets.toml
similarity index 100%
rename from docs/examples/.dlt/example.secrets.toml
rename to docs/examples/archive/.dlt/example.secrets.toml
diff --git a/docs/examples/README.md b/docs/examples/archive/README.md
similarity index 100%
rename from docs/examples/README.md
rename to docs/examples/archive/README.md
diff --git a/docs/examples/__init__.py b/docs/examples/archive/__init__.py
similarity index 100%
rename from docs/examples/__init__.py
rename to docs/examples/archive/__init__.py
diff --git a/docs/examples/_helpers.py b/docs/examples/archive/_helpers.py
similarity index 100%
rename from docs/examples/_helpers.py
rename to docs/examples/archive/_helpers.py
diff --git a/docs/examples/credentials/.dlt/config.toml b/docs/examples/archive/credentials/.dlt/config.toml
similarity index 100%
rename from docs/examples/credentials/.dlt/config.toml
rename to docs/examples/archive/credentials/.dlt/config.toml
diff --git a/docs/examples/credentials/__init__.py b/docs/examples/archive/credentials/__init__.py
similarity index 100%
rename from docs/examples/credentials/__init__.py
rename to docs/examples/archive/credentials/__init__.py
diff --git a/docs/examples/credentials/explicit.py b/docs/examples/archive/credentials/explicit.py
similarity index 100%
rename from docs/examples/credentials/explicit.py
rename to docs/examples/archive/credentials/explicit.py
diff --git a/docs/examples/data/channels.json b/docs/examples/archive/data/channels.json
similarity index 100%
rename from docs/examples/data/channels.json
rename to docs/examples/archive/data/channels.json
diff --git a/docs/examples/data/demo_example.json b/docs/examples/archive/data/demo_example.json
similarity index 100%
rename from docs/examples/data/demo_example.json
rename to docs/examples/archive/data/demo_example.json
diff --git a/docs/examples/data/messages.json b/docs/examples/archive/data/messages.json
similarity index 100%
rename from docs/examples/data/messages.json
rename to docs/examples/archive/data/messages.json
diff --git a/docs/examples/data/rasa_trackers/2888158124550630_tracker.jsonl b/docs/examples/archive/data/rasa_trackers/2888158124550630_tracker.jsonl
similarity index 100%
rename from docs/examples/data/rasa_trackers/2888158124550630_tracker.jsonl
rename to docs/examples/archive/data/rasa_trackers/2888158124550630_tracker.jsonl
diff --git a/docs/examples/data/rasa_trackers/8629c904-0c26-4f0b-927b-14d48db43c28_tracker.jsonl b/docs/examples/archive/data/rasa_trackers/8629c904-0c26-4f0b-927b-14d48db43c28_tracker.jsonl
similarity index 100%
rename from docs/examples/data/rasa_trackers/8629c904-0c26-4f0b-927b-14d48db43c28_tracker.jsonl
rename to docs/examples/archive/data/rasa_trackers/8629c904-0c26-4f0b-927b-14d48db43c28_tracker.jsonl
diff --git a/docs/examples/data/singer_taps/csv_catalog.json b/docs/examples/archive/data/singer_taps/csv_catalog.json
similarity index 100%
rename from docs/examples/data/singer_taps/csv_catalog.json
rename to docs/examples/archive/data/singer_taps/csv_catalog.json
diff --git a/docs/examples/data/singer_taps/model_annotations.csv b/docs/examples/archive/data/singer_taps/model_annotations.csv
similarity index 100%
rename from docs/examples/data/singer_taps/model_annotations.csv
rename to docs/examples/archive/data/singer_taps/model_annotations.csv
diff --git a/docs/examples/data/singer_taps/tap_google_sheet.jsonl b/docs/examples/archive/data/singer_taps/tap_google_sheet.jsonl
similarity index 100%
rename from docs/examples/data/singer_taps/tap_google_sheet.jsonl
rename to docs/examples/archive/data/singer_taps/tap_google_sheet.jsonl
diff --git a/docs/examples/data/singer_taps/tap_hubspot.jsonl b/docs/examples/archive/data/singer_taps/tap_hubspot.jsonl
similarity index 100%
rename from docs/examples/data/singer_taps/tap_hubspot.jsonl
rename to docs/examples/archive/data/singer_taps/tap_hubspot.jsonl
diff --git a/docs/examples/dbt_run_jaffle.py b/docs/examples/archive/dbt_run_jaffle.py
similarity index 100%
rename from docs/examples/dbt_run_jaffle.py
rename to docs/examples/archive/dbt_run_jaffle.py
diff --git a/docs/examples/discord_iterator.py b/docs/examples/archive/discord_iterator.py
similarity index 100%
rename from docs/examples/discord_iterator.py
rename to docs/examples/archive/discord_iterator.py
diff --git a/docs/examples/examples/schemas/dlt_quickstart.schema.yaml b/docs/examples/archive/examples/schemas/dlt_quickstart.schema.yaml
similarity index 100%
rename from docs/examples/examples/schemas/dlt_quickstart.schema.yaml
rename to docs/examples/archive/examples/schemas/dlt_quickstart.schema.yaml
diff --git a/docs/examples/google_drive_csv.py b/docs/examples/archive/google_drive_csv.py
similarity index 100%
rename from docs/examples/google_drive_csv.py
rename to docs/examples/archive/google_drive_csv.py
diff --git a/docs/examples/google_sheets.py b/docs/examples/archive/google_sheets.py
similarity index 100%
rename from docs/examples/google_sheets.py
rename to docs/examples/archive/google_sheets.py
diff --git a/docs/examples/quickstart.py b/docs/examples/archive/quickstart.py
similarity index 100%
rename from docs/examples/quickstart.py
rename to docs/examples/archive/quickstart.py
diff --git a/docs/examples/rasa_example.py b/docs/examples/archive/rasa_example.py
similarity index 88%
rename from docs/examples/rasa_example.py
rename to docs/examples/archive/rasa_example.py
index 3dbd61c692..d438ce5e8b 100644
--- a/docs/examples/rasa_example.py
+++ b/docs/examples/archive/rasa_example.py
@@ -3,10 +3,10 @@
import dlt
from dlt.destinations import bigquery, postgres
-from docs.examples.sources.jsonl import jsonl_files
-from docs.examples.sources.rasa import rasa
+from .sources.jsonl import jsonl_files
+from .sources.rasa import rasa
-from docs.examples._helpers import pub_bigquery_credentials
+from ._helpers import pub_bigquery_credentials
# let's load to bigquery, here we provide the credentials for our public project
# credentials = pub_bigquery_credentials
diff --git a/docs/examples/read_table.py b/docs/examples/archive/read_table.py
similarity index 100%
rename from docs/examples/read_table.py
rename to docs/examples/archive/read_table.py
diff --git a/docs/examples/restore_pipeline.py b/docs/examples/archive/restore_pipeline.py
similarity index 100%
rename from docs/examples/restore_pipeline.py
rename to docs/examples/archive/restore_pipeline.py
diff --git a/docs/examples/schemas/__init__.py b/docs/examples/archive/schemas/__init__.py
similarity index 100%
rename from docs/examples/schemas/__init__.py
rename to docs/examples/archive/schemas/__init__.py
diff --git a/docs/examples/schemas/discord.schema.yml b/docs/examples/archive/schemas/discord.schema.yml
similarity index 100%
rename from docs/examples/schemas/discord.schema.yml
rename to docs/examples/archive/schemas/discord.schema.yml
diff --git a/docs/examples/schemas/dlt_quickstart.schema.yaml b/docs/examples/archive/schemas/dlt_quickstart.schema.yaml
similarity index 100%
rename from docs/examples/schemas/dlt_quickstart.schema.yaml
rename to docs/examples/archive/schemas/dlt_quickstart.schema.yaml
diff --git a/docs/examples/schemas/hubspot.schema.yaml b/docs/examples/archive/schemas/hubspot.schema.yaml
similarity index 100%
rename from docs/examples/schemas/hubspot.schema.yaml
rename to docs/examples/archive/schemas/hubspot.schema.yaml
diff --git a/docs/examples/schemas/inferred_demo.schema.yml b/docs/examples/archive/schemas/inferred_demo.schema.yml
similarity index 100%
rename from docs/examples/schemas/inferred_demo.schema.yml
rename to docs/examples/archive/schemas/inferred_demo.schema.yml
diff --git a/docs/examples/singer_tap_example.py b/docs/examples/archive/singer_tap_example.py
similarity index 100%
rename from docs/examples/singer_tap_example.py
rename to docs/examples/archive/singer_tap_example.py
diff --git a/docs/examples/singer_tap_jsonl_example.py b/docs/examples/archive/singer_tap_jsonl_example.py
similarity index 100%
rename from docs/examples/singer_tap_jsonl_example.py
rename to docs/examples/archive/singer_tap_jsonl_example.py
diff --git a/docs/examples/sources/__init__.py b/docs/examples/archive/sources/__init__.py
similarity index 100%
rename from docs/examples/sources/__init__.py
rename to docs/examples/archive/sources/__init__.py
diff --git a/docs/examples/sources/google_sheets.py b/docs/examples/archive/sources/google_sheets.py
similarity index 100%
rename from docs/examples/sources/google_sheets.py
rename to docs/examples/archive/sources/google_sheets.py
diff --git a/docs/examples/sources/jsonl.py b/docs/examples/archive/sources/jsonl.py
similarity index 100%
rename from docs/examples/sources/jsonl.py
rename to docs/examples/archive/sources/jsonl.py
diff --git a/docs/examples/sources/rasa/__init__.py b/docs/examples/archive/sources/rasa/__init__.py
similarity index 100%
rename from docs/examples/sources/rasa/__init__.py
rename to docs/examples/archive/sources/rasa/__init__.py
diff --git a/docs/examples/sources/rasa/rasa.py b/docs/examples/archive/sources/rasa/rasa.py
similarity index 100%
rename from docs/examples/sources/rasa/rasa.py
rename to docs/examples/archive/sources/rasa/rasa.py
diff --git a/docs/examples/sources/rasa/rasa.schema.yaml b/docs/examples/archive/sources/rasa/rasa.schema.yaml
similarity index 100%
rename from docs/examples/sources/rasa/rasa.schema.yaml
rename to docs/examples/archive/sources/rasa/rasa.schema.yaml
diff --git a/docs/examples/sources/singer_tap.py b/docs/examples/archive/sources/singer_tap.py
similarity index 97%
rename from docs/examples/sources/singer_tap.py
rename to docs/examples/archive/sources/singer_tap.py
index 65c9b76e0b..41db2c09f5 100644
--- a/docs/examples/sources/singer_tap.py
+++ b/docs/examples/archive/sources/singer_tap.py
@@ -95,6 +95,6 @@ def singer_messages() -> Iterator[TDataItem]:
os.path.abspath(catalog_file_path),
*state_params
)
- yield from get_source_from_stream(pipe_iterator, state) # type: ignore
+ yield from get_source_from_stream(pipe_iterator, state)
return singer_messages
diff --git a/docs/examples/sources/sql_query.py b/docs/examples/archive/sources/sql_query.py
similarity index 100%
rename from docs/examples/sources/sql_query.py
rename to docs/examples/archive/sources/sql_query.py
diff --git a/docs/examples/sources/stdout.py b/docs/examples/archive/sources/stdout.py
similarity index 100%
rename from docs/examples/sources/stdout.py
rename to docs/examples/archive/sources/stdout.py
diff --git a/docs/examples/sync_schema_example.py b/docs/examples/archive/sync_schema_example.py
similarity index 100%
rename from docs/examples/sync_schema_example.py
rename to docs/examples/archive/sync_schema_example.py
diff --git a/docs/examples/incremental_loading/.dlt/config.toml b/docs/examples/incremental_loading/.dlt/config.toml
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/examples/incremental_loading/.dlt/secrets.toml b/docs/examples/incremental_loading/.dlt/secrets.toml
new file mode 100644
index 0000000000..4dec919c06
--- /dev/null
+++ b/docs/examples/incremental_loading/.dlt/secrets.toml
@@ -0,0 +1,4 @@
+[sources.zendesk.credentials]
+password = ""
+subdomain = ""
+email = ""
\ No newline at end of file
diff --git a/docs/examples/incremental_loading/__init__.py b/docs/examples/incremental_loading/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/examples/incremental_loading/zendesk.py b/docs/examples/incremental_loading/zendesk.py
new file mode 100644
index 0000000000..6370f29811
--- /dev/null
+++ b/docs/examples/incremental_loading/zendesk.py
@@ -0,0 +1,126 @@
+from typing import Iterator, Optional, Dict, Any, Tuple
+
+import dlt
+from dlt.common import pendulum
+from dlt.common.time import ensure_pendulum_datetime
+from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime
+from dlt.extract.source import DltResource
+from dlt.sources.helpers.requests import client
+
+
+@dlt.source(max_table_nesting=2)
+def zendesk_support(
+ credentials: Dict[str, str]=dlt.secrets.value,
+ start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008
+ end_date: Optional[TAnyDateTime] = None,
+):
+ """
+ Retrieves data from Zendesk Support for tickets events.
+
+ Args:
+ credentials: Zendesk credentials (default: dlt.secrets.value)
+ start_date: Start date for data extraction (default: 2000-01-01)
+ end_date: End date for data extraction (default: None).
+ If end time is not provided, the incremental loading will be
+ enabled, and after the initial run, only new data will be retrieved.
+
+ Returns:
+ DltResource.
+ """
+ # Convert start_date and end_date to Pendulum datetime objects
+ start_date_obj = ensure_pendulum_datetime(start_date)
+ end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None
+
+ # Convert Pendulum datetime objects to Unix timestamps
+ start_date_ts = start_date_obj.int_timestamp
+ end_date_ts: Optional[int] = None
+ if end_date_obj:
+ end_date_ts = end_date_obj.int_timestamp
+
+ # Extract credentials from secrets dictionary
+ auth = (credentials["email"], credentials["password"])
+ subdomain = credentials["subdomain"]
+ url = f"https://{subdomain}.zendesk.com"
+
+ # we use `append` write disposition, because objects in ticket_events endpoint are never updated
+ # so we do not need to merge
+ # we set primary_key so allow deduplication of events by the `incremental` below in the rare case
+ # when two events have the same timestamp
+ @dlt.resource(primary_key="id", write_disposition="append")
+ def ticket_events(
+ timestamp: dlt.sources.incremental[int] = dlt.sources.incremental(
+ "timestamp",
+ initial_value=start_date_ts,
+ end_value=end_date_ts,
+ allow_external_schedulers=True,
+ ),
+ ):
+ # URL For ticket events
+ # 'https://d3v-dlthub.zendesk.com/api/v2/incremental/ticket_events.json?start_time=946684800'
+ event_pages = get_pages(
+ url=url,
+ endpoint="/api/v2/incremental/ticket_events.json",
+ auth=auth,
+ data_point_name="ticket_events",
+ params={"start_time": timestamp.last_value},
+ )
+ for page in event_pages:
+ yield page
+ # stop loading when using end_value and end is reached.
+ # unfortunately, Zendesk API does not have the "end_time" parameter, so we stop iterating ourselves
+ if timestamp.end_out_of_range:
+ return
+
+ return ticket_events
+
+
+def get_pages(
+ url: str,
+ endpoint: str,
+ auth: Tuple[str, str],
+ data_point_name: str,
+ params: Optional[Dict[str, Any]] = None,
+):
+ """
+ Makes a request to a paginated endpoint and returns a generator of data items per page.
+
+ Args:
+ url: The base URL.
+ endpoint: The url to the endpoint, e.g. /api/v2/calls
+ auth: Credentials for authentication.
+ data_point_name: The key which data items are nested under in the response object (e.g. calls)
+ params: Optional dict of query params to include in the request.
+
+ Returns:
+ Generator of pages, each page is a list of dict data items.
+ """
+ # update the page size to enable cursor pagination
+ params = params or {}
+ params["per_page"] = 1000
+ headers = None
+
+ # make request and keep looping until there is no next page
+ get_url = f"{url}{endpoint}"
+ while get_url:
+ response = client.get(
+ get_url, headers=headers, auth=auth, params=params
+ )
+ response.raise_for_status()
+ response_json = response.json()
+ result = response_json[data_point_name]
+ yield result
+
+ get_url = None
+ # See https://developer.zendesk.com/api-reference/ticketing/ticket-management/incremental_exports/#json-format
+ if not response_json["end_of_stream"]:
+ get_url = response_json["next_page"]
+
+
+if __name__ == "__main__":
+ # create dlt pipeline
+ pipeline = dlt.pipeline(
+ pipeline_name="zendesk", destination="duckdb", dataset_name="zendesk_data"
+ )
+
+ load_info = pipeline.run(zendesk_support())
+ print(load_info)
\ No newline at end of file
diff --git a/docs/examples/transformers/.dlt/config.toml b/docs/examples/transformers/.dlt/config.toml
new file mode 100644
index 0000000000..a366f34edf
--- /dev/null
+++ b/docs/examples/transformers/.dlt/config.toml
@@ -0,0 +1,16 @@
+[runtime]
+log_level="WARNING"
+
+[extract]
+# use 2 workers to extract sources in parallel
+worker=2
+# allow 10 async items to be processed in parallel
+max_parallel_items=10
+
+[normalize]
+# use 3 worker processes to process 3 files in parallel
+workers=3
+
+[load]
+# have 50 concurrent load jobs
+workers=50
\ No newline at end of file
diff --git a/docs/examples/transformers/__init__.py b/docs/examples/transformers/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/examples/transformers/pokemon.py b/docs/examples/transformers/pokemon.py
new file mode 100644
index 0000000000..ce8cc0142c
--- /dev/null
+++ b/docs/examples/transformers/pokemon.py
@@ -0,0 +1,61 @@
+import dlt
+from dlt.sources.helpers import requests
+
+
+@dlt.source(max_table_nesting=2)
+def source(pokemon_api_url: str):
+ """"""
+
+ # note that we deselect `pokemon_list` - we do not want it to be loaded
+ @dlt.resource(write_disposition="replace", selected=False)
+ def pokemon_list():
+ """Retrieve a first page of Pokemons and yield it. We do not retrieve all the pages in this example"""
+ yield requests.get(pokemon_api_url).json()["results"]
+
+ # transformer that retrieves a list of objects in parallel
+ @dlt.transformer
+ def pokemon(pokemons):
+ """Yields details for a list of `pokemons`"""
+
+ # @dlt.defer marks a function to be executed in parallel
+ # in a thread pool
+ @dlt.defer
+ def _get_pokemon(_pokemon):
+ return requests.get(_pokemon["url"]).json()
+
+ # call and yield the function result normally, the @dlt.defer takes care of parallelism
+ for _pokemon in pokemons:
+ yield _get_pokemon(_pokemon)
+
+ # a special case where just one item is retrieved in transformer
+ # a whole transformer may be marked for parallel execution
+ @dlt.transformer
+ @dlt.defer
+ def species(pokemon_details):
+ """Yields species details for a pokemon"""
+ species_data = requests.get(pokemon_details["species"]["url"]).json()
+ # link back to pokemon so we have a relation in loaded data
+ species_data["pokemon_id"] = pokemon_details["id"]
+ # just return the results, if you yield,
+ # generator will be evaluated in main thread
+ return species_data
+
+ # create two simple pipelines with | operator
+ # 1. send list of pokemons into `pokemon` transformer to get pokemon details
+ # 2. send pokemon details into `species` transformer to get species details
+ # NOTE: dlt is smart enough to get data from pokemon_list and pokemon details once
+
+ return (
+ pokemon_list | pokemon,
+ pokemon_list | pokemon | species
+ )
+
+if __name__ == "__main__":
+ # build duck db pipeline
+ pipeline = dlt.pipeline(
+ pipeline_name="pokemon", destination="duckdb", dataset_name="pokemon_data"
+ )
+
+ # the pokemon_list resource does not need to be loaded
+ load_info = pipeline.run(source("https://pokeapi.co/api/v2/pokemon"))
+ print(load_info)
\ No newline at end of file
diff --git a/docs/examples/transformers_and_parallelism/__init__.py b/docs/examples/transformers_and_parallelism/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/docs/technical/create_pipeline.md b/docs/technical/create_pipeline.md
index 59e5742680..f6603d08b8 100644
--- a/docs/technical/create_pipeline.md
+++ b/docs/technical/create_pipeline.md
@@ -420,12 +420,12 @@ The Python function that yields is not a function but magical object that `dlt`
```python
def lazy_function(endpoint_name):
- # INIT - this will be executed only once when DLT wants!
+ # INIT - this will be executed only once when dlt wants!
get_configuration()
from_item = dlt.current.state.get("last_item", 0)
l = get_item_list_from_api(api_key, endpoint_name)
- # ITERATOR - this will be executed many times also when DLT wants more data!
+ # ITERATOR - this will be executed many times also when dlt wants more data!
for item in l:
yield requests.get(url, api_key, "%s?id=%s" % (endpoint_name, item["id"])).json()
# CLEANUP
diff --git a/docs/website/docs/dlt-ecosystem/destinations/athena.md b/docs/website/docs/dlt-ecosystem/destinations/athena.md
index 275e36736e..74771ba74f 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/athena.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/athena.md
@@ -6,7 +6,7 @@ keywords: [aws, athena, glue catalog]
# AWS Athena / Glue Catalog
-The athena destination stores data as parquet files in s3 buckets and creates [external tables in aws athena](https://docs.aws.amazon.com/athena/latest/ug/creating-tables.html). You can then query those tables with athena sql commands which will then scan the whole folder of parquet files and return the results. This destination works very similar to other sql based destinations, with the exception of the merge write disposition not being supported at this time. DLT metadata will be stored in the same bucket as the parquet files, but as iceberg tables.
+The athena destination stores data as parquet files in s3 buckets and creates [external tables in aws athena](https://docs.aws.amazon.com/athena/latest/ug/creating-tables.html). You can then query those tables with athena sql commands which will then scan the whole folder of parquet files and return the results. This destination works very similar to other sql based destinations, with the exception of the merge write disposition not being supported at this time. dlt metadata will be stored in the same bucket as the parquet files, but as iceberg tables.
## Setup Guide
### 1. Initialize the dlt project
diff --git a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md
index 39720670e2..709686e220 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md
@@ -118,7 +118,7 @@ BigQuery supports the following [column hints](https://dlthub.com/docs/general-u
## Staging Support
-BigQuery supports gcs as a file staging destination. DLT will upload files in the parquet format to gcs and ask BigQuery to copy their data directly into the db. Please refer to the [Google Storage filesystem documentation](./filesystem.md#google-storage) to learn how to set up your gcs bucket with the bucket_url and credentials. If you use the same service account for gcs and your redshift deployment, you do not need to provide additional authentication for BigQuery to be able to read from your bucket.
+BigQuery supports gcs as a file staging destination. dlt will upload files in the parquet format to gcs and ask BigQuery to copy their data directly into the db. Please refer to the [Google Storage filesystem documentation](./filesystem.md#google-storage) to learn how to set up your gcs bucket with the bucket_url and credentials. If you use the same service account for gcs and your redshift deployment, you do not need to provide additional authentication for BigQuery to be able to read from your bucket.
```toml
```
diff --git a/docs/website/docs/dlt-ecosystem/destinations/redshift.md b/docs/website/docs/dlt-ecosystem/destinations/redshift.md
index 7b0f5daaea..ff29407d48 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/redshift.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/redshift.md
@@ -90,7 +90,7 @@ Amazon Redshift supports the following column hints:
## Staging support
-Redshift supports s3 as a file staging destination. DLT will upload files in the parquet format to s3 and ask redshift to copy their data directly into the db. Please refere to the [S3 documentation](./filesystem.md#aws-s3) to learn how to set up your s3 bucket with the bucket_url and credentials. The `dlt` Redshift loader will use the aws credentials provided for s3 to access the s3 bucket if not specified otherwise (see config options below). Alternatively to parquet files, you can also specify jsonl as the staging file format. For this set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`.
+Redshift supports s3 as a file staging destination. dlt will upload files in the parquet format to s3 and ask redshift to copy their data directly into the db. Please refere to the [S3 documentation](./filesystem.md#aws-s3) to learn how to set up your s3 bucket with the bucket_url and credentials. The `dlt` Redshift loader will use the aws credentials provided for s3 to access the s3 bucket if not specified otherwise (see config options below). Alternatively to parquet files, you can also specify jsonl as the staging file format. For this set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`.
### Authentication iam Role
diff --git a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md
index 7df4275898..47d3aabf26 100644
--- a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md
+++ b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md
@@ -126,7 +126,7 @@ Names of tables and columns in [schemas](../../general-usage/schema.md) are kept
## Staging support
-Snowflake supports s3 and gcs as a file staging destinations. DLT will upload files in the parquet format to the bucket provider and will ask snowflake to copy their data directly into the db.
+Snowflake supports s3 and gcs as a file staging destinations. dlt will upload files in the parquet format to the bucket provider and will ask snowflake to copy their data directly into the db.
Alternavitely to parquet files, you can also specify jsonl as the staging file format. For this set the `loader_file_format` argument of the `run` command of the pipeline to `jsonl`.
diff --git a/docs/website/docs/dlt-ecosystem/staging.md b/docs/website/docs/dlt-ecosystem/staging.md
index 7cf6bfd30d..d2ed03a2a2 100644
--- a/docs/website/docs/dlt-ecosystem/staging.md
+++ b/docs/website/docs/dlt-ecosystem/staging.md
@@ -7,7 +7,7 @@ keywords: [staging, destination]
The goal of staging is to bring the data closer to the database engine so the modification of the destination (final) dataset happens faster and without errors. `dlt`, when asked, creates two
staging areas:
-1. A **staging dataset** used by the [merge and replace loads](../general-usage/incremental-loading.md#merge-incremental-loading) to deduplicate and merge data with the destination. Such staging dataset has the same name as the dataset passed to `dlt.pipeline` but with `_staging` suffix in the name. As a user you typically never see and directly interact with it.
+1. A **staging dataset** used by the [merge and replace loads](../general-usage/incremental-loading.md#merge-incremental_loading) to deduplicate and merge data with the destination. Such staging dataset has the same name as the dataset passed to `dlt.pipeline` but with `_staging` suffix in the name. As a user you typically never see and directly interact with it.
2. A **staging storage** which is typically a s3/gcp bucket where [loader files](file-formats/) are copied before they are loaded by the destination.
## Staging storage
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/github.md b/docs/website/docs/dlt-ecosystem/verified-sources/github.md
index 8ffe3af68c..f68d872493 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/github.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/github.md
@@ -233,7 +233,7 @@ def repo_events(
dlt.sources.incremental. If no value is given, the default "initial_value" is used. The function
"last_value_func" determines the most recent 'created_at' value.
-Read more about [incremental loading](../../general-usage/incremental-loading#incremental-loading-with-last-value).
+Read more about [incremental loading](../../general-usage/incremental-loading#incremental_loading-with-last-value).
## Customization
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md b/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md
index 15b79850f2..aaf823b702 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md
@@ -380,7 +380,7 @@ dlt.resource(
`merge_key`: Parameter is used to specify the column used to identify records for merging. In this
case,"spreadsheet_id", means that the records will be merged based on the values in this column.
-[Read more](https://dlthub.com/docs/general-usage/incremental-loading#merge-incremental-loading).
+[Read more](https://dlthub.com/docs/general-usage/incremental-loading#merge-incremental_loading).
### Create your own pipeline
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md b/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md
index 836b491c50..fa432f7e52 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md
@@ -231,4 +231,4 @@ To create your data pipeline using single loading and [incremental data loading
6. It's important to keep the pipeline name and destination dataset name unchanged. The pipeline name is crucial for retrieving the [state](https://dlthub.com/docs/general-usage/state) of the last pipeline run, which includes the end date needed for loading data incrementally. Modifying these names can lead to [“full_refresh”](https://dlthub.com/docs/general-usage/pipeline#do-experiments-with-full-refresh) which will disrupt the tracking of relevant metadata(state) for [incremental data loading](https://dlthub.com/docs/general-usage/incremental-loading).
-That's it! Enjoy running your Stripe DLT pipeline!
+That's it! Enjoy running your Stripe dlt pipeline!
diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/workable.md b/docs/website/docs/dlt-ecosystem/verified-sources/workable.md
index dd25fd51f7..419d2ec924 100644
--- a/docs/website/docs/dlt-ecosystem/verified-sources/workable.md
+++ b/docs/website/docs/dlt-ecosystem/verified-sources/workable.md
@@ -226,4 +226,4 @@ To create your data pipeline using single loading and [incremental data loading]
> Note: In the pipeline run mentioned above, we are loading two [dependent endpoints](workable.md#dependent-endpoints). “jobs_activities” and “jobs_application_form”. To ensure that these endpoints are loaded properly, the **`load_details`** parameter is set as **`True`**.
-That’s it! Enjoy running your Workable DLT pipeline!
+That’s it! Enjoy running your Workable dlt pipeline!
diff --git a/docs/website/docs/examples/_examples-header.md b/docs/website/docs/examples/_examples-header.md
new file mode 100644
index 0000000000..9840b00d29
--- /dev/null
+++ b/docs/website/docs/examples/_examples-header.md
@@ -0,0 +1,21 @@
+import Admonition from "@theme/Admonition";
+import CodeBlock from '@theme/CodeBlock';
+
+