diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml new file mode 100644 index 0000000000..00e7462311 --- /dev/null +++ b/.github/workflows/deploy_docs.yml @@ -0,0 +1,16 @@ +name: deploy docs + +on: + schedule: + - cron: '0 2 * * *' + workflow_dispatch: + +env: + NETLIFY_DOCS_PRODUCTION_DEPLOY_HOOK: ${{ secrets.NETLIFY_DOCS_PRODUCTION_DEPLOY_HOOK }} + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Trigger deploy hook + run: curl ${{ env.NETLIFY_DOCS_PRODUCTION_DEPLOY_HOOK }} -X POST diff --git a/docs/examples/chess_production/chess.py b/docs/examples/chess_production/chess.py index f7c5849e57..2e85805781 100644 --- a/docs/examples/chess_production/chess.py +++ b/docs/examples/chess_production/chess.py @@ -6,7 +6,6 @@ from dlt.common.typing import StrAny, TDataItems from dlt.sources.helpers.requests import client - @dlt.source def chess( chess_url: str = dlt.config.value, @@ -60,7 +59,6 @@ def players_games(username: Any) -> Iterator[TDataItems]: MAX_PLAYERS = 5 - def load_data_with_retry(pipeline, data): try: for attempt in Retrying( @@ -70,7 +68,9 @@ def load_data_with_retry(pipeline, data): reraise=True, ): with attempt: - logger.info(f"Running the pipeline, attempt={attempt.retry_state.attempt_number}") + logger.info( + f"Running the pipeline, attempt={attempt.retry_state.attempt_number}" + ) load_info = pipeline.run(data) logger.info(str(load_info)) @@ -92,7 +92,9 @@ def load_data_with_retry(pipeline, data): # print the information on the first load package and all jobs inside logger.info(f"First load package info: {load_info.load_packages[0]}") # print the information on the first completed job in first load package - logger.info(f"First completed job info: {load_info.load_packages[0].jobs['completed_jobs'][0]}") + logger.info( + f"First completed job info: {load_info.load_packages[0].jobs['completed_jobs'][0]}" + ) # check for schema updates: schema_updates = [p.schema_update for p in load_info.load_packages] @@ -150,4 +152,4 @@ def load_data_with_retry(pipeline, data): ) # get data for a few famous players data = chess(chess_url="https://api.chess.com/pub/", max_players=MAX_PLAYERS) - load_data_with_retry(pipeline, data) + load_data_with_retry(pipeline, data) \ No newline at end of file diff --git a/docs/examples/connector_x_arrow/load_arrow.py b/docs/examples/connector_x_arrow/load_arrow.py index 307e657514..06ca4e17b3 100644 --- a/docs/examples/connector_x_arrow/load_arrow.py +++ b/docs/examples/connector_x_arrow/load_arrow.py @@ -3,7 +3,6 @@ import dlt from dlt.sources.credentials import ConnectionStringCredentials - def read_sql_x( conn_str: ConnectionStringCredentials = dlt.secrets.value, query: str = dlt.config.value, @@ -15,17 +14,16 @@ def read_sql_x( protocol="binary", ) - def genome_resource(): # create genome resource with merge on `upid` primary key genome = dlt.resource( - name="acanthochromis_polyacanthus", + name="genome", write_disposition="merge", - primary_key="analysis_id", + primary_key="upid", standalone=True, )(read_sql_x)( - "mysql://anonymous@ensembldb.ensembl.org:3306/acanthochromis_polyacanthus_core_100_1", # type: ignore[arg-type] - "SELECT * FROM analysis LIMIT 20", + "mysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam", # type: ignore[arg-type] + "SELECT * FROM genome ORDER BY created LIMIT 1000", ) # add incremental on created at genome.apply_hints(incremental=dlt.sources.incremental("created")) diff --git a/docs/examples/google_sheets/google_sheets.py b/docs/examples/google_sheets/google_sheets.py index 1ba330e4ca..8a93df9970 100644 --- a/docs/examples/google_sheets/google_sheets.py +++ b/docs/examples/google_sheets/google_sheets.py @@ -9,7 +9,6 @@ ) from dlt.common.typing import DictStrAny, StrAny - def _initialize_sheets( credentials: Union[GcpOAuthCredentials, GcpServiceAccountCredentials] ) -> Any: @@ -17,7 +16,6 @@ def _initialize_sheets( service = build("sheets", "v4", credentials=credentials.to_native_credentials()) return service - @dlt.source def google_spreadsheet( spreadsheet_id: str, @@ -57,7 +55,6 @@ def get_sheet(sheet_name: str) -> Iterator[DictStrAny]: for name in sheet_names ] - if __name__ == "__main__": pipeline = dlt.pipeline(destination="duckdb") # see example.secrets.toml to where to put credentials @@ -70,4 +67,4 @@ def get_sheet(sheet_name: str) -> Iterator[DictStrAny]: sheet_names=range_names, ) ) - print(info) + print(info) \ No newline at end of file diff --git a/docs/examples/incremental_loading/zendesk.py b/docs/examples/incremental_loading/zendesk.py index 6113f98793..4b8597886a 100644 --- a/docs/examples/incremental_loading/zendesk.py +++ b/docs/examples/incremental_loading/zendesk.py @@ -6,11 +6,12 @@ from dlt.common.typing import TAnyDateTime from dlt.sources.helpers.requests import client - @dlt.source(max_table_nesting=2) def zendesk_support( credentials: Dict[str, str] = dlt.secrets.value, - start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 + start_date: Optional[TAnyDateTime] = pendulum.datetime( # noqa: B008 + year=2000, month=1, day=1 + ), end_date: Optional[TAnyDateTime] = None, ): """ @@ -112,7 +113,6 @@ def get_pages( if not response_json["end_of_stream"]: get_url = response_json["next_page"] - if __name__ == "__main__": # create dlt pipeline pipeline = dlt.pipeline( @@ -120,4 +120,4 @@ def get_pages( ) load_info = pipeline.run(zendesk_support()) - print(load_info) + print(load_info) \ No newline at end of file diff --git a/docs/examples/nested_data/nested_data.py b/docs/examples/nested_data/nested_data.py index 7f85f0522e..3464448de6 100644 --- a/docs/examples/nested_data/nested_data.py +++ b/docs/examples/nested_data/nested_data.py @@ -13,7 +13,6 @@ CHUNK_SIZE = 10000 - # You can limit how deep dlt goes when generating child tables. # By default, the library will descend and generate child tables # for all nested lists, without a limit. @@ -82,7 +81,6 @@ def load_documents(self) -> Iterator[TDataItem]: while docs_slice := list(islice(cursor, CHUNK_SIZE)): yield map_nested_in_place(convert_mongo_objs, docs_slice) - def convert_mongo_objs(value: Any) -> Any: if isinstance(value, (ObjectId, Decimal128)): return str(value) diff --git a/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py b/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py index e7f57853ed..8f7833e7d7 100644 --- a/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py +++ b/docs/examples/pdf_to_weaviate/pdf_to_weaviate.py @@ -4,7 +4,6 @@ from dlt.destinations.impl.weaviate import weaviate_adapter from PyPDF2 import PdfReader - @dlt.resource(selected=False) def list_files(folder_path: str): folder_path = os.path.abspath(folder_path) @@ -16,7 +15,6 @@ def list_files(folder_path: str): "mtime": os.path.getmtime(file_path), } - @dlt.transformer(primary_key="page_id", write_disposition="merge") def pdf_to_text(file_item, separate_pages: bool = False): if not separate_pages: @@ -30,7 +28,6 @@ def pdf_to_text(file_item, separate_pages: bool = False): page_item["page_id"] = file_item["file_name"] + "_" + str(page_no) yield page_item - pipeline = dlt.pipeline(pipeline_name="pdf_to_text", destination="weaviate") # this constructs a simple pipeline that: (1) reads files from "invoices" folder (2) filters only those ending with ".pdf" @@ -54,4 +51,4 @@ def pdf_to_text(file_item, separate_pages: bool = False): client = weaviate.Client("http://localhost:8080") # get text of all the invoices in InvoiceText class we just created above -print(client.query.get("InvoiceText", ["text", "file_name", "mtime", "page_id"]).do()) +print(client.query.get("InvoiceText", ["text", "file_name", "mtime", "page_id"]).do()) \ No newline at end of file diff --git a/docs/examples/qdrant_zendesk/qdrant.py b/docs/examples/qdrant_zendesk/qdrant.py index bd0cbafc99..300d8dc6ad 100644 --- a/docs/examples/qdrant_zendesk/qdrant.py +++ b/docs/examples/qdrant_zendesk/qdrant.py @@ -10,12 +10,13 @@ from dlt.common.configuration.inject import with_config - # function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk @dlt.source(max_table_nesting=2) def zendesk_support( credentials: Dict[str, str] = dlt.secrets.value, - start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 + start_date: Optional[TAnyDateTime] = pendulum.datetime( # noqa: B008 + year=2000, month=1, day=1 + ), end_date: Optional[TAnyDateTime] = None, ): """ @@ -79,7 +80,6 @@ def _parse_date_or_none(value: Optional[str]) -> Optional[pendulum.DateTime]: return None return ensure_pendulum_datetime(value) - # modify dates to return datetime objects instead def _fix_date(ticket): ticket["updated_at"] = _parse_date_or_none(ticket["updated_at"]) @@ -87,7 +87,6 @@ def _fix_date(ticket): ticket["due_at"] = _parse_date_or_none(ticket["due_at"]) return ticket - # function from: https://github.com/dlt-hub/verified-sources/tree/master/sources/zendesk def get_pages( url: str, @@ -128,7 +127,6 @@ def get_pages( if not response_json["end_of_stream"]: get_url = response_json["next_page"] - if __name__ == "__main__": # create a pipeline with an appropriate name pipeline = dlt.pipeline( @@ -148,6 +146,7 @@ def get_pages( print(load_info) + # running the Qdrant client to connect to your Qdrant database @with_config(sections=("destination", "qdrant", "credentials")) diff --git a/docs/examples/transformers/pokemon.py b/docs/examples/transformers/pokemon.py index 97b9a98b11..c17beff6a8 100644 --- a/docs/examples/transformers/pokemon.py +++ b/docs/examples/transformers/pokemon.py @@ -1,7 +1,6 @@ import dlt from dlt.sources.helpers import requests - @dlt.source(max_table_nesting=2) def source(pokemon_api_url: str): """""" @@ -47,7 +46,6 @@ def species(pokemon_details): return (pokemon_list | pokemon, pokemon_list | pokemon | species) - if __name__ == "__main__": # build duck db pipeline pipeline = dlt.pipeline( @@ -56,4 +54,4 @@ def species(pokemon_details): # the pokemon_list resource does not need to be loaded load_info = pipeline.run(source("https://pokeapi.co/api/v2/pokemon")) - print(load_info) + print(load_info) \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/athena.md b/docs/website/docs/dlt-ecosystem/destinations/athena.md index a2f57538ba..9fc5dc15f9 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/athena.md +++ b/docs/website/docs/dlt-ecosystem/destinations/athena.md @@ -160,4 +160,14 @@ aws_data_catalog="awsdatacatalog" You can choose the following file formats: * [parquet](../file-formats/parquet.md) is used by default ------- + +## Additional Setup guides + +- [Load data from Chess.com to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-athena) +- [Load data from Notion to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-athena) +- [Load data from HubSpot to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-athena) +- [Load data from GitHub to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-athena) +- [Load data from Google Analytics to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-athena) +- [Load data from Google Sheets to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-athena) +- [Load data from Stripe to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-athena) + diff --git a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md index f44aab20b7..30c39e2ecf 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/bigquery.md +++ b/docs/website/docs/dlt-ecosystem/destinations/bigquery.md @@ -173,3 +173,15 @@ This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-b ### Syncing of `dlt` state This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination) + + +## Additional Setup guides + +- [Load data from Notion to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-bigquery) +- [Load data from Google Analytics to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-bigquery) +- [Load data from Chess.com to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-bigquery) +- [Load data from HubSpot to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-bigquery) +- [Load data from GitHub to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-bigquery) +- [Load data from Google Sheets to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-bigquery) +- [Load data from Stripe to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-bigquery) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/databricks.md b/docs/website/docs/dlt-ecosystem/destinations/databricks.md index ae1435b482..fc100e41e2 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/databricks.md +++ b/docs/website/docs/dlt-ecosystem/destinations/databricks.md @@ -184,3 +184,15 @@ This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-d ### Syncing of `dlt` state This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination). + + +## Additional Setup guides + +- [Load data from GitHub to Databricks in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-databricks) +- [Load data from Notion to Databricks in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-databricks) +- [Load data from Stripe to Databricks in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-databricks) +- [Load data from HubSpot to Databricks in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-databricks) +- [Load data from Google Analytics to Databricks in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-databricks) +- [Load data from Google Sheets to Databricks in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-databricks) +- [Load data from Chess.com to Databricks in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-databricks) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/duckdb.md b/docs/website/docs/dlt-ecosystem/destinations/duckdb.md index 2849a3e427..db7428dcc9 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/duckdb.md +++ b/docs/website/docs/dlt-ecosystem/destinations/duckdb.md @@ -113,3 +113,15 @@ This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-d ### Syncing of `dlt` state This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination) + + +## Additional Setup guides + +- [Load data from Google Analytics to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-duckdb) +- [Load data from Google Sheets to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-duckdb) +- [Load data from Stripe to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-duckdb) +- [Load data from Notion to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-duckdb) +- [Load data from Chess.com to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-duckdb) +- [Load data from HubSpot to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-duckdb) +- [Load data from GitHub to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-duckdb) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md index 7b32132361..decb234785 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md +++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md @@ -234,3 +234,6 @@ You can choose the following file formats: ## Syncing of `dlt` state This destination does not support restoring the `dlt` state. You can change that by requesting the [feature](https://github.com/dlt-hub/dlt/issues/new/choose) or contributing to the core library 😄 You can however easily [backup and restore the pipeline working folder](https://gist.github.com/rudolfix/ee6e16d8671f26ac4b9ffc915ad24b6e) - reusing the bucket and credentials used to store files. + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/motherduck.md b/docs/website/docs/dlt-ecosystem/destinations/motherduck.md index 7a219150ec..b002286bcf 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/motherduck.md +++ b/docs/website/docs/dlt-ecosystem/destinations/motherduck.md @@ -99,3 +99,6 @@ My observation is that if you write a lot of data into the database then close t ### Invalid Input Error: Initialization function "motherduck_init" from file Use `duckdb 0.8.1` or above. + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/mssql.md b/docs/website/docs/dlt-ecosystem/destinations/mssql.md index e98f8bf256..9d216a52a3 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/mssql.md +++ b/docs/website/docs/dlt-ecosystem/destinations/mssql.md @@ -110,3 +110,14 @@ destination.mssql.credentials="mssql://loader:@loader.database.windows ### dbt support No dbt support yet + +## Additional Setup guides + +- [Load data from Stripe to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-mssql) +- [Load data from Google Analytics to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-mssql) +- [Load data from Google Sheets to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-mssql) +- [Load data from Chess.com to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-mssql) +- [Load data from GitHub to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-mssql) +- [Load data from Notion to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-mssql) +- [Load data from HubSpot to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-mssql) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/postgres.md b/docs/website/docs/dlt-ecosystem/destinations/postgres.md index 5b63e0fd17..cd0ea08929 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/postgres.md +++ b/docs/website/docs/dlt-ecosystem/destinations/postgres.md @@ -96,3 +96,15 @@ This destination [integrates with dbt](../transformations/dbt/dbt.md) via dbt-po ### Syncing of `dlt` state This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination) + + +## Additional Setup guides + +- [Load data from HubSpot to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-postgres) +- [Load data from GitHub to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-postgres) +- [Load data from Chess.com to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-postgres) +- [Load data from Notion to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-postgres) +- [Load data from Google Analytics to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-postgres) +- [Load data from Google Sheets to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-postgres) +- [Load data from Stripe to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-postgres) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/qdrant.md b/docs/website/docs/dlt-ecosystem/destinations/qdrant.md index b47745cc1a..04b5cac19b 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/qdrant.md +++ b/docs/website/docs/dlt-ecosystem/destinations/qdrant.md @@ -220,3 +220,6 @@ You can find the setup instructions to run Qdrant [here](https://qdrant.tech/doc ### Syncing of `dlt` state Qdrant destination supports syncing of the `dlt` state. + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/redshift.md b/docs/website/docs/dlt-ecosystem/destinations/redshift.md index 7512be101d..cb220a31fc 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/redshift.md +++ b/docs/website/docs/dlt-ecosystem/destinations/redshift.md @@ -132,3 +132,15 @@ pipeline = dlt.pipeline( ## Supported loader file formats Supported loader file formats for Redshift are `sql` and `insert_values` (default). When using a staging location, Redshift supports `parquet` and `jsonl`. + + +## Additional Setup guides + +- [Load data from Google Analytics to Redshift in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-redshift) +- [Load data from Notion to Redshift in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-redshift) +- [Load data from Chess.com to Redshift in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-redshift) +- [Load data from HubSpot to Redshift in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-redshift) +- [Load data from GitHub to Redshift in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-redshift) +- [Load data from Stripe to Redshift in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-redshift) +- [Load data from Google Sheets to Redshift in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-redshift) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md index 713e706434..34efb0df39 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/snowflake.md +++ b/docs/website/docs/dlt-ecosystem/destinations/snowflake.md @@ -255,3 +255,15 @@ This destination [integrates with dbt](../transformations/dbt/dbt.md) via [dbt-s ### Syncing of `dlt` state This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination) + + +## Additional Setup guides + +- [Load data from Stripe to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-snowflake) +- [Load data from GitHub to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-snowflake) +- [Load data from Google Analytics to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-snowflake) +- [Load data from Notion to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-snowflake) +- [Load data from HubSpot to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-snowflake) +- [Load data from Chess.com to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-snowflake) +- [Load data from Google Sheets to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-snowflake) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/synapse.md b/docs/website/docs/dlt-ecosystem/destinations/synapse.md index 8c1a7b29bc..6ace1ac5a8 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/synapse.md +++ b/docs/website/docs/dlt-ecosystem/destinations/synapse.md @@ -202,3 +202,14 @@ Integration with [dbt](../transformations/dbt/dbt.md) is currently not supported ### Syncing of `dlt` state This destination fully supports [dlt state sync](../../general-usage/state#syncing-state-with-destination). + +## Additional Setup guides + +- [Load data from Notion to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-synapse) +- [Load data from Google Analytics to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-synapse) +- [Load data from Google Sheets to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-synapse) +- [Load data from HubSpot to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-synapse) +- [Load data from GitHub to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-synapse) +- [Load data from Stripe to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-synapse) +- [Load data from Chess.com to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-synapse) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md index a81c920f16..2ec09e9c24 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md +++ b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md @@ -301,3 +301,7 @@ Currently Weaviate destination does not support dbt. ### Syncing of `dlt` state Weaviate destination supports syncing of the `dlt` state. + + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md b/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md index 570f4c2b80..0baf1917d1 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/airtable.md @@ -239,3 +239,6 @@ verified source. load_info = pipeline.run(airtables, write_disposition="replace") print(load_info) ``` + + + diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/amazon_kinesis.md b/docs/website/docs/dlt-ecosystem/verified-sources/amazon_kinesis.md index b293cd04cc..2fb97ff320 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/amazon_kinesis.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/amazon_kinesis.md @@ -290,3 +290,6 @@ verified source. json.typed_dump(managed_state.state, f) print(managed_state.state) ``` + + + diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/asana.md b/docs/website/docs/dlt-ecosystem/verified-sources/asana.md index e4d838935a..8554cdd376 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/asana.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/asana.md @@ -268,3 +268,5 @@ these steps: # print the information on data that was loaded print(load_info) ``` + + diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/chess.md b/docs/website/docs/dlt-ecosystem/verified-sources/chess.md index 2f8da8dbdb..7f01b83f08 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/chess.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/chess.md @@ -225,12 +225,16 @@ To create your data loading pipeline for players and load data, follow these ste print(info) ``` + ## Additional Setup guides + - [Load data from Chess.com to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-athena) - [Load data from Chess.com to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-postgres) +- [Load data from Chess.com to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-mssql) - [Load data from Chess.com to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-bigquery) - [Load data from Chess.com to Redshift in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-redshift) - [Load data from Chess.com to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-duckdb) - [Load data from Chess.com to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-synapse) - [Load data from Chess.com to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-snowflake) - [Load data from Chess.com to Databricks in python with dlt](https://dlthub.com/docs/pipelines/chess/load-data-with-python-from-chess-to-databricks) + diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md b/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md index c01845600a..dea97921b4 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/facebook_ads.md @@ -443,3 +443,6 @@ verified source. > runs, only new reports are loaded, with the past `attribution_window_days_lag` days (default is 7) > being refreshed to accommodate any changes. You can adjust `time_increment_days` to change report > frequency (default set to one). + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md index c9c9e54985..1421977ae8 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/filesystem.md @@ -468,3 +468,5 @@ verified source. fs_client.ls("ci-test-bucket/standard_source/samples") ``` + + diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/github.md b/docs/website/docs/dlt-ecosystem/verified-sources/github.md index c5a5e2f3cc..2fd0277500 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/github.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/github.md @@ -286,12 +286,16 @@ verified source. It is optional to use `access_token` or make anonymous API calls. + ## Additional Setup guides + - [Load data from GitHub to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-postgres) - [Load data from GitHub to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-athena) - [Load data from GitHub to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-snowflake) - [Load data from GitHub to Databricks in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-databricks) - [Load data from GitHub to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-bigquery) +- [Load data from GitHub to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-mssql) - [Load data from GitHub to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-synapse) - [Load data from GitHub to Redshift in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-redshift) - [Load data from GitHub to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/github/load-data-with-python-from-github-to-duckdb) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/google_analytics.md b/docs/website/docs/dlt-ecosystem/verified-sources/google_analytics.md index f5993655ee..02d7803a9b 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/google_analytics.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/google_analytics.md @@ -337,8 +337,11 @@ verified source. > Loads data starting from the specified date during the first run, and then > [incrementally](https://dlthub.com/docs/general-usage/incremental-loading) in subsequent runs. + ## Additional Setup guides + - [Load data from Google Analytics to Redshift in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-redshift) +- [Load data from Google Analytics to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-mssql) - [Load data from Google Analytics to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-bigquery) - [Load data from Google Analytics to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-duckdb) - [Load data from Google Analytics to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-snowflake) @@ -346,3 +349,4 @@ verified source. - [Load data from Google Analytics to Databricks in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-databricks) - [Load data from Google Analytics to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-postgres) - [Load data from Google Analytics to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/google_analytics/load-data-with-python-from-google_analytics-to-athena) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md b/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md index 5bb1f7b977..830ba909ee 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/google_sheets.md @@ -572,7 +572,10 @@ def get_named_ranges(): Enjoy the DLT Google Sheets pipeline experience! + ## Additional Setup guides + +- [Load data from Google Sheets to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-mssql) - [Load data from Google Sheets to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-synapse) - [Load data from Google Sheets to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-duckdb) - [Load data from Google Sheets to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-postgres) @@ -581,3 +584,4 @@ Enjoy the DLT Google Sheets pipeline experience! - [Load data from Google Sheets to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-athena) - [Load data from Google Sheets to Redshift in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-redshift) - [Load data from Google Sheets to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/google_sheets/load-data-with-python-from-google_sheets-to-snowflake) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md b/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md index 202d2303ec..3a623c7b49 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md @@ -285,7 +285,9 @@ verified source. the previous pipeline run. Refer to our official documentation for more information on [incremental loading](../../general-usage/incremental-loading.md). + ## Additional Setup guides + - [Load data from HubSpot to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-postgres) - [Load data from HubSpot to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-athena) - [Load data from HubSpot to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-bigquery) @@ -294,3 +296,5 @@ verified source. - [Load data from HubSpot to Redshift in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-redshift) - [Load data from HubSpot to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-duckdb) - [Load data from HubSpot to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-snowflake) +- [Load data from HubSpot to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/hubspot/load-data-with-python-from-hubspot-to-mssql) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/inbox.md b/docs/website/docs/dlt-ecosystem/verified-sources/inbox.md index 4d681ab675..2aa1d1130f 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/inbox.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/inbox.md @@ -282,4 +282,7 @@ verified source. load_info = pipeline.run((attachments | pdf_to_text).with_name("my_pages")) # Display loaded data details. print(load_info) - ``` \ No newline at end of file + ``` + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/jira.md b/docs/website/docs/dlt-ecosystem/verified-sources/jira.md index ae826d1754..4588f4f4c6 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/jira.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/jira.md @@ -216,3 +216,6 @@ above. # Print Load information print(f"Load Information: {load_info}") ``` + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/kafka.md b/docs/website/docs/dlt-ecosystem/verified-sources/kafka.md index 1d09907d0a..694a81ba1f 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/kafka.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/kafka.md @@ -191,3 +191,6 @@ this offset. data = kafka_consumer("topic", start_from=pendulum.datetime(2023, 12, 15)) pipeline.run(data) ``` + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/matomo.md b/docs/website/docs/dlt-ecosystem/verified-sources/matomo.md index 5063c19cdb..45841850c6 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/matomo.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/matomo.md @@ -300,3 +300,5 @@ verified source. print(load_info) ``` + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md b/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md index b2841e2e43..9178d2ab6d 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/mongodb.md @@ -328,3 +328,7 @@ verified source. info = pipeline.run(source, write_disposition="replace") print(info) ``` + + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/mux.md b/docs/website/docs/dlt-ecosystem/verified-sources/mux.md index 4c6e878176..a713121f29 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/mux.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/mux.md @@ -194,3 +194,5 @@ verified source. print(load_info) ``` + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/notion.md b/docs/website/docs/dlt-ecosystem/verified-sources/notion.md index 36f809d902..ffb0becfbb 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/notion.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/notion.md @@ -187,3 +187,17 @@ uniquely identifies a specific page or database. The database name ("use_name") is optional; if skipped, the pipeline will fetch it from Notion automatically. + + +## Additional Setup guides + +- [Load data from Notion to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-bigquery) +- [Load data from Notion to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-athena) +- [Load data from Notion to Redshift in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-redshift) +- [Load data from Notion to Azure Synapse in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-synapse) +- [Load data from Notion to PostgreSQL in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-postgres) +- [Load data from Notion to Databricks in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-databricks) +- [Load data from Notion to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-duckdb) +- [Load data from Notion to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-snowflake) +- [Load data from Notion to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/notion/load-data-with-python-from-notion-to-mssql) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/personio.md b/docs/website/docs/dlt-ecosystem/verified-sources/personio.md index e8cdf0fee4..6fae36d0ec 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/personio.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/personio.md @@ -253,3 +253,6 @@ verified source. load_data = personio_source() print(pipeline.run(load_data)) ``` + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/pipedrive.md b/docs/website/docs/dlt-ecosystem/verified-sources/pipedrive.md index 7e26999f3c..9d1a5a0a02 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/pipedrive.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/pipedrive.md @@ -291,3 +291,6 @@ verified source. load_info = pipeline.run(activities_source) print(load_info) ``` + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md b/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md index bb09e53cc5..3051c740e1 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/salesforce.md @@ -274,3 +274,6 @@ steps: > overwriting existing data. Conversely, the "task" endpoint supports "merge" mode for > incremental loads, updating or adding data based on the 'last_timestamp' value without erasing > previously loaded data. + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md b/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md index 42cd795568..09dc392c87 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/shopify.md @@ -338,3 +338,6 @@ verified source. load_info = pipeline.run(resource) print(load_info) ``` + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/slack.md b/docs/website/docs/dlt-ecosystem/verified-sources/slack.md index 9de3ebe211..85fd3f2a3a 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/slack.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/slack.md @@ -289,3 +289,6 @@ verified source. load_info = pipeline.run(source.with_resources("general")) print(load_info) ``` + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md index bb23132e9b..dcd816f3a0 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/sql_database.md @@ -376,3 +376,6 @@ To create your own pipeline, use source and resource methods from this verified ::: 1. Remember to keep the pipeline name and destination dataset name consistent. The pipeline name is crucial for retrieving the [state](https://dlthub.com/docs/general-usage/state) from the last run, which is essential for incremental loading. Altering these names could initiate a "[full_refresh](https://dlthub.com/docs/general-usage/pipeline#do-experiments-with-full-refresh)", interfering with the metadata tracking necessary for [incremental loads](https://dlthub.com/docs/general-usage/incremental-loading). + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md b/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md index 9715885573..4ddf20aa78 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/strapi.md @@ -176,3 +176,6 @@ verified source. > We loaded the "athletes" endpoint above, which can be customized to suit our specific > requirements. + + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md b/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md index cfe7d89960..0b172dc3be 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/stripe.md @@ -267,7 +267,10 @@ verified source. print(load_info) ``` + ## Additional Setup guides + +- [Load data from Stripe to Microsoft SQL Server in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-mssql) - [Load data from Stripe to Snowflake in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-snowflake) - [Load data from Stripe to Databricks in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-databricks) - [Load data from Stripe to DuckDB in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-duckdb) @@ -276,3 +279,4 @@ verified source. - [Load data from Stripe to Redshift in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-redshift) - [Load data from Stripe to AWS Athena in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-athena) - [Load data from Stripe to BigQuery in python with dlt](https://dlthub.com/docs/pipelines/stripe_analytics/load-data-with-python-from-stripe_analytics-to-bigquery) + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/workable.md b/docs/website/docs/dlt-ecosystem/verified-sources/workable.md index ab561c936a..8701db7db8 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/workable.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/workable.md @@ -271,3 +271,5 @@ To create your data pipeline using single loading and disrupting metadata tracking for [incremental data loading](https://dlthub.com/docs/general-usage/incremental-loading). + + \ No newline at end of file diff --git a/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md b/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md index 58153be31b..234483dca0 100644 --- a/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md +++ b/docs/website/docs/dlt-ecosystem/verified-sources/zendesk.md @@ -373,3 +373,6 @@ verified source. > This can be useful to reduce the potential failure window when loading large amounts of historic > data. This approach can be used with all incremental Zendesk sources. + + + \ No newline at end of file diff --git a/docs/website/docs/examples/connector_x_arrow/index.md b/docs/website/docs/examples/connector_x_arrow/index.md index 92941e1988..6702b8bbef 100644 --- a/docs/website/docs/examples/connector_x_arrow/index.md +++ b/docs/website/docs/examples/connector_x_arrow/index.md @@ -56,13 +56,13 @@ def read_sql_x( def genome_resource(): # create genome resource with merge on `upid` primary key genome = dlt.resource( - name="acanthochromis_polyacanthus", + name="genome", write_disposition="merge", - primary_key="analysis_id", + primary_key="upid", standalone=True, )(read_sql_x)( - "mysql://anonymous@ensembldb.ensembl.org:3306/acanthochromis_polyacanthus_core_100_1", # type: ignore[arg-type] - "SELECT * FROM analysis LIMIT 20", + "mysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam", # type: ignore[arg-type] + "SELECT * FROM genome ORDER BY created LIMIT 1000", ) # add incremental on created at genome.apply_hints(incremental=dlt.sources.incremental("created")) diff --git a/docs/website/docs/intro.md b/docs/website/docs/intro.md index 37f03544d0..04af626566 100644 --- a/docs/website/docs/intro.md +++ b/docs/website/docs/intro.md @@ -143,25 +143,21 @@ from sqlalchemy import create_engine # MySQL instance to get data. # NOTE: you'll need to install pymysql with `pip install pymysql` # NOTE: loading data from public mysql instance may take several seconds -engine = create_engine( - "mysql+pymysql://anonymous@ensembldb.ensembl.org:3306/acanthochromis_polyacanthus_core_100_1" -) +engine = create_engine("mysql+pymysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam") with engine.connect() as conn: # Select genome table, stream data in batches of 100 elements - query = "SELECT * FROM analysis LIMIT 1000" + query = "SELECT * FROM genome LIMIT 1000" rows = conn.execution_options(yield_per=100).exec_driver_sql(query) pipeline = dlt.pipeline( pipeline_name="from_database", destination="duckdb", - dataset_name="acanthochromis_polyacanthus_data", + dataset_name="genome_data", ) # Convert the rows into dictionaries on the fly with a map function - load_info = pipeline.run( - map(lambda row: dict(row._mapping), rows), table_name="acanthochromis_polyacanthus" - ) + load_info = pipeline.run(map(lambda row: dict(row._mapping), rows), table_name="genome") print(load_info) ``` diff --git a/docs/website/package-lock.json b/docs/website/package-lock.json index c45374f83b..c62c496477 100644 --- a/docs/website/package-lock.json +++ b/docs/website/package-lock.json @@ -21,6 +21,7 @@ "react-dom": "^17.0.2", "react-twitter-embed": "^4.0.4", "string-dedent": "^3.0.1", + "sync-fetch": "^0.5.2", "toml": "^3.0.0" }, "devDependencies": { @@ -11359,6 +11360,17 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/sync-fetch": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/sync-fetch/-/sync-fetch-0.5.2.tgz", + "integrity": "sha512-6gBqqkHrYvkH65WI2bzrDwrIKmt3U10s4Exnz3dYuE5Ah62FIfNv/F63inrNhu2Nyh3GH5f42GKU3RrSJoaUyQ==", + "dependencies": { + "node-fetch": "^2.6.1" + }, + "engines": { + "node": ">=14" + } + }, "node_modules/tapable": { "version": "2.2.1", "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz", diff --git a/docs/website/package.json b/docs/website/package.json index 5e824dbb35..623f04efba 100644 --- a/docs/website/package.json +++ b/docs/website/package.json @@ -21,6 +21,7 @@ "@mdx-js/react": "^1.6.22", "clsx": "^1.2.1", "dedent": "^1.5.1", + "dotenv": "^16.3.1", "node-watch": "^0.7.4", "prism-react-renderer": "^1.3.5", "raw-loader": "^4.0.2", @@ -28,7 +29,7 @@ "react-dom": "^17.0.2", "react-twitter-embed": "^4.0.4", "string-dedent": "^3.0.1", - "dotenv": "^16.3.1", + "sync-fetch": "^0.5.2", "toml": "^3.0.0" }, "devDependencies": { diff --git a/docs/website/tools/update_snippets.js b/docs/website/tools/update_snippets.js index 1a085a02d5..9e4d180d88 100644 --- a/docs/website/tools/update_snippets.js +++ b/docs/website/tools/update_snippets.js @@ -2,6 +2,7 @@ const fs = require('fs'); const path = require('path'); const dedent = require('dedent'); var watch = require('node-watch'); +const fetch = require('sync-fetch') // docs snippets settings const BASE_DIR = "./docs/"; @@ -19,6 +20,13 @@ const DLT_MARKER = "@@@DLT"; const START_MARKER = DLT_MARKER + "_SNIPPET_START"; const END_MARKER = DLT_MARKER + "_SNIPPET_END"; +const tubaConfig = fetch('https://dlthub.com/docs/pipelines/links.json', { + headers: { + Accept: 'application/vnd.citationstyles.csl+json' + } +}).json() + + // yield all files in dir function *walkSync(dir) { const files = fs.readdirSync(dir, { withFileTypes: true }); @@ -99,8 +107,34 @@ function getSnippetFromFile(snippetsFileName, snippetName) { // get the right snippet for a file function getSnippet(fileName, snippetName) { + + // regular snippet const ext = path.extname(fileName); - const snippetParts = snippetName.split("::"); + const snippetParts = snippetName.split("::"); + + // tuba snippet + if (snippetName.startsWith("tuba::")) { + const tag = snippetParts[1]; + const links = [] + for (const link of tubaConfig) { + if (link.tags.includes(tag)) { + links.push(link) + } + } + // no matching links + if (links.length == 0) { + return [] + } + + // render links + const snippet = ["## Additional Setup guides", ""] + for (const link of links) { + snippet.push(`- [${link.title}](${link.public_url})`) + } + return snippet + } + + // regular snippet let snippetsFileName = fileName.slice(0, -ext.length) + SNIPPETS_FILE_SUFFIX; if (snippetParts.length > 1) { snippetsFileName = path. dirname(fileName) + "/" + snippetParts[0];