-
Notifications
You must be signed in to change notification settings - Fork 186
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* remove snipsync (#613) * add custom snippets element * remove snipsync * migrate performance snippets * add missing init files * refine snippet watcher * docs examples * fixes chess example * fixes DLT -> dlt * more work on transformers example * make header smaller * example for zendesk incremental loading * move incremental loading example to right location * added text and output example to incremental zendesk * allow secrets files in examples * add zendesk credentials * correct text and code snippets for zendesk example * add main clause * add config example * pytest marker to skip tests running in PRs from forks on github * removes more typings and adds comments to zendesk example * shortens example titles --------- Co-authored-by: Marcin Rudolf <[email protected]> Co-authored-by: AstrakhantsevaAA <[email protected]>
- Loading branch information
1 parent
513e73b
commit 58f8ad1
Showing
95 changed files
with
935 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# here is a file with the secrets for all the example pipelines in `examples` folder | ||
|
||
[sources] | ||
# redshift password for query tables example | ||
query_table.credentials.password="8P5gyDPNo9zo582rQG6a" | ||
query_sql.credentials.password="8P5gyDPNo9zo582rQG6a" | ||
|
||
# google sheets example | ||
[sources.google_spreadsheet.credentials] | ||
project_id="chat-analytics-317513" | ||
client_email="[email protected]" | ||
private_key="-----BEGIN PRIVATE KEY-----\nMIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQCNEN0bL39HmD+S\n7inCg8CdRKEMZ/q7Rv5uUiTyUMjQLNXySOPRSSJBSXBPpLJPbcmfxCYgOPWadA3F\noa54WJFR3Uxd+SjAC848dGz5+JEL5u2rHcjzL1IbjDd5oH9rap/QxYm/R9Q5eSdC\nlGiiFh4zH+U9nhWWUovn+ofixbQkhrMFOfgHt+Jvdh/2m7Sdz47itjWFC258R1Ki\nH9vPVtHxw0LrcUZN7HACV3NICRUkvX8U2+JC25esmPxc/qqmxoFlQ7ono/NeMjQa\nq2TyTyNSh4pDtf30PAW4cU2AUbtaSPhIdRloDuwzW9d88VUUbVelqHHeJRgGjkSX\nQz2MCBuFAgMBAAECgf8zlepWYEgm8xtdim2ydB3mdhR/zoZmZM/Q1NthJ8u/IbdO\nb2HPEXxGbDKIIJzhAA17Un98leBLwYKuLZhOpdB+igyJlTG8XlCRF88XiUosJWR3\niHmiuMkndHA7WyTXDc0n3GpUFYWkGGng2cKLx7V/OFmpMwhC9LEKMNOrBKnf9U6Z\n/9nanIerFZU4m5mWbNW/ZRc+qvd+1zGw/JYM6ntdkKLo/TwNOmOS5FS01yLvx7Xw\nm12f9I3VceGXWyrYEh+UCWk0gsEb8xnGGZKy3op5I6trsXzH8I3HCXvapkeWSaFe\n/gmT3CHZIK9hang6f4yMG+niuNtZE2/METgvcjkCgYEAwTg1SZAYSaL6LoFV92Kq\nyHV0DP8KivDIKrByOym9oikPK/2ZMNi9NivVmSALuR54wj7pFxFmyEj6UTklSeLb\nRvOjcPnZEMbFspRHIzkySfsnfScoHZXOeakjOub1K5FehYsLXQIfe7iwRg/mcd/2\noFVyJrm2aNXcvNuug4scEE0CgYEAuuaRmGY5mKv+viuZ/zzOky7IjDnp4w2BMJt0\noMXznKuLHJpexnQ9A/ZHxpAp6Bi6Glk0XLi2uaI+ggXlEUfNa3DHMQu7xg1RaCqN\n957WGRO0ETtIWdp1BHhWPtT5kdOrjSZy9vRSZ0vh2WnZe5SgKRVCqQsV7ExcEltz\nUc9WlBkCgYA9MaQOzEgk6iz6FZQ4aVNVcX1zsEKShneerYtAGZQpi392mzatNbeX\nNILNoEyWMIRmYK5J1AUNYa+FkeexYtu3uOoGmdqZaZqrWDK/gRngPF7hUElwNUXT\nWjICMatsRPn+qW7L4iQ+dtu9FMQTRK9DUEx6305aHYFvftPibWhR8QKBgQCAd3GG\nNmXKihaMsr2kUjCPvG1+7WPVfHfbaE9PHyFnBAaXv4f7kvRJn+QQGRGlBjINYFl8\njj6S9HFQwCqGqTsKabeQ/8auyIK3PeDdXqE9FW0FFyGRGXarfueRQqTU1pCpcc89\n7gwiEmeIIJiruCoqcwGh3gvQo1/6AkAO8JxLKQKBgF0T8P0hRctXFejcFf/4EikS\n2+WA/gNSQITC1m+8nWNnU+bDmRax+pIkzlvjkG5kyNfWvB7i2A5Y5OnCo92y5aDQ\nzbGHLwZj0HXqLFXhbAv/0xZPXlZ71NWpi2BpCJRnzU65ftsjePfydfvN6g4mPQ28\nkHQsYKUZk5HPC8FlPvQe\n-----END PRIVATE KEY-----\n" | ||
|
||
[destination] | ||
# all postgres destinations for all examples | ||
postgres.credentials = "postgres://loader:loader@localhost:5432/dlt_data" | ||
# all redshift destinations for all examples | ||
redshift.credentials = "postgres://loader:8P5gyDPNo9zo582rQG6a@chat-analytics.czwteevq7bpe.eu-central-1.redshift.amazonaws.com:5439/chat_analytics_rasa" | ||
|
||
# all the bigquery destinations | ||
[destination.bigquery.credentials] | ||
project_id="chat-analytics-317513" | ||
client_email="[email protected]" | ||
private_key="-----BEGIN PRIVATE KEY-----\nMIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQCNEN0bL39HmD+S\n7inCg8CdRKEMZ/q7Rv5uUiTyUMjQLNXySOPRSSJBSXBPpLJPbcmfxCYgOPWadA3F\noa54WJFR3Uxd+SjAC848dGz5+JEL5u2rHcjzL1IbjDd5oH9rap/QxYm/R9Q5eSdC\nlGiiFh4zH+U9nhWWUovn+ofixbQkhrMFOfgHt+Jvdh/2m7Sdz47itjWFC258R1Ki\nH9vPVtHxw0LrcUZN7HACV3NICRUkvX8U2+JC25esmPxc/qqmxoFlQ7ono/NeMjQa\nq2TyTyNSh4pDtf30PAW4cU2AUbtaSPhIdRloDuwzW9d88VUUbVelqHHeJRgGjkSX\nQz2MCBuFAgMBAAECgf8zlepWYEgm8xtdim2ydB3mdhR/zoZmZM/Q1NthJ8u/IbdO\nb2HPEXxGbDKIIJzhAA17Un98leBLwYKuLZhOpdB+igyJlTG8XlCRF88XiUosJWR3\niHmiuMkndHA7WyTXDc0n3GpUFYWkGGng2cKLx7V/OFmpMwhC9LEKMNOrBKnf9U6Z\n/9nanIerFZU4m5mWbNW/ZRc+qvd+1zGw/JYM6ntdkKLo/TwNOmOS5FS01yLvx7Xw\nm12f9I3VceGXWyrYEh+UCWk0gsEb8xnGGZKy3op5I6trsXzH8I3HCXvapkeWSaFe\n/gmT3CHZIK9hang6f4yMG+niuNtZE2/METgvcjkCgYEAwTg1SZAYSaL6LoFV92Kq\nyHV0DP8KivDIKrByOym9oikPK/2ZMNi9NivVmSALuR54wj7pFxFmyEj6UTklSeLb\nRvOjcPnZEMbFspRHIzkySfsnfScoHZXOeakjOub1K5FehYsLXQIfe7iwRg/mcd/2\noFVyJrm2aNXcvNuug4scEE0CgYEAuuaRmGY5mKv+viuZ/zzOky7IjDnp4w2BMJt0\noMXznKuLHJpexnQ9A/ZHxpAp6Bi6Glk0XLi2uaI+ggXlEUfNa3DHMQu7xg1RaCqN\n957WGRO0ETtIWdp1BHhWPtT5kdOrjSZy9vRSZ0vh2WnZe5SgKRVCqQsV7ExcEltz\nUc9WlBkCgYA9MaQOzEgk6iz6FZQ4aVNVcX1zsEKShneerYtAGZQpi392mzatNbeX\nNILNoEyWMIRmYK5J1AUNYa+FkeexYtu3uOoGmdqZaZqrWDK/gRngPF7hUElwNUXT\nWjICMatsRPn+qW7L4iQ+dtu9FMQTRK9DUEx6305aHYFvftPibWhR8QKBgQCAd3GG\nNmXKihaMsr2kUjCPvG1+7WPVfHfbaE9PHyFnBAaXv4f7kvRJn+QQGRGlBjINYFl8\njj6S9HFQwCqGqTsKabeQ/8auyIK3PeDdXqE9FW0FFyGRGXarfueRQqTU1pCpcc89\n7gwiEmeIIJiruCoqcwGh3gvQo1/6AkAO8JxLKQKBgF0T8P0hRctXFejcFf/4EikS\n2+WA/gNSQITC1m+8nWNnU+bDmRax+pIkzlvjkG5kyNfWvB7i2A5Y5OnCo92y5aDQ\nzbGHLwZj0HXqLFXhbAv/0xZPXlZ71NWpi2BpCJRnzU65ftsjePfydfvN6g4mPQ28\nkHQsYKUZk5HPC8FlPvQe\n-----END PRIVATE KEY-----\n" | ||
|
||
|
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
[sources.zendesk.credentials] | ||
password = "" | ||
subdomain = "" | ||
email = "" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
from typing import Iterator, Optional, Dict, Any, Tuple | ||
|
||
import dlt | ||
from dlt.common import pendulum | ||
from dlt.common.time import ensure_pendulum_datetime | ||
from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime | ||
from dlt.extract.source import DltResource | ||
from dlt.sources.helpers.requests import client | ||
|
||
|
||
@dlt.source(max_table_nesting=2) | ||
def zendesk_support( | ||
credentials: Dict[str, str]=dlt.secrets.value, | ||
start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 | ||
end_date: Optional[TAnyDateTime] = None, | ||
): | ||
""" | ||
Retrieves data from Zendesk Support for tickets events. | ||
Args: | ||
credentials: Zendesk credentials (default: dlt.secrets.value) | ||
start_date: Start date for data extraction (default: 2000-01-01) | ||
end_date: End date for data extraction (default: None). | ||
If end time is not provided, the incremental loading will be | ||
enabled, and after the initial run, only new data will be retrieved. | ||
Returns: | ||
DltResource. | ||
""" | ||
# Convert start_date and end_date to Pendulum datetime objects | ||
start_date_obj = ensure_pendulum_datetime(start_date) | ||
end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None | ||
|
||
# Convert Pendulum datetime objects to Unix timestamps | ||
start_date_ts = start_date_obj.int_timestamp | ||
end_date_ts: Optional[int] = None | ||
if end_date_obj: | ||
end_date_ts = end_date_obj.int_timestamp | ||
|
||
# Extract credentials from secrets dictionary | ||
auth = (credentials["email"], credentials["password"]) | ||
subdomain = credentials["subdomain"] | ||
url = f"https://{subdomain}.zendesk.com" | ||
|
||
# we use `append` write disposition, because objects in ticket_events endpoint are never updated | ||
# so we do not need to merge | ||
# we set primary_key so allow deduplication of events by the `incremental` below in the rare case | ||
# when two events have the same timestamp | ||
@dlt.resource(primary_key="id", write_disposition="append") | ||
def ticket_events( | ||
timestamp: dlt.sources.incremental[int] = dlt.sources.incremental( | ||
"timestamp", | ||
initial_value=start_date_ts, | ||
end_value=end_date_ts, | ||
allow_external_schedulers=True, | ||
), | ||
): | ||
# URL For ticket events | ||
# 'https://d3v-dlthub.zendesk.com/api/v2/incremental/ticket_events.json?start_time=946684800' | ||
event_pages = get_pages( | ||
url=url, | ||
endpoint="/api/v2/incremental/ticket_events.json", | ||
auth=auth, | ||
data_point_name="ticket_events", | ||
params={"start_time": timestamp.last_value}, | ||
) | ||
for page in event_pages: | ||
yield page | ||
# stop loading when using end_value and end is reached. | ||
# unfortunately, Zendesk API does not have the "end_time" parameter, so we stop iterating ourselves | ||
if timestamp.end_out_of_range: | ||
return | ||
|
||
return ticket_events | ||
|
||
|
||
def get_pages( | ||
url: str, | ||
endpoint: str, | ||
auth: Tuple[str, str], | ||
data_point_name: str, | ||
params: Optional[Dict[str, Any]] = None, | ||
): | ||
""" | ||
Makes a request to a paginated endpoint and returns a generator of data items per page. | ||
Args: | ||
url: The base URL. | ||
endpoint: The url to the endpoint, e.g. /api/v2/calls | ||
auth: Credentials for authentication. | ||
data_point_name: The key which data items are nested under in the response object (e.g. calls) | ||
params: Optional dict of query params to include in the request. | ||
Returns: | ||
Generator of pages, each page is a list of dict data items. | ||
""" | ||
# update the page size to enable cursor pagination | ||
params = params or {} | ||
params["per_page"] = 1000 | ||
headers = None | ||
|
||
# make request and keep looping until there is no next page | ||
get_url = f"{url}{endpoint}" | ||
while get_url: | ||
response = client.get( | ||
get_url, headers=headers, auth=auth, params=params | ||
) | ||
response.raise_for_status() | ||
response_json = response.json() | ||
result = response_json[data_point_name] | ||
yield result | ||
|
||
get_url = None | ||
# See https://developer.zendesk.com/api-reference/ticketing/ticket-management/incremental_exports/#json-format | ||
if not response_json["end_of_stream"]: | ||
get_url = response_json["next_page"] | ||
|
||
|
||
if __name__ == "__main__": | ||
# create dlt pipeline | ||
pipeline = dlt.pipeline( | ||
pipeline_name="zendesk", destination="duckdb", dataset_name="zendesk_data" | ||
) | ||
|
||
load_info = pipeline.run(zendesk_support()) | ||
print(load_info) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
[runtime] | ||
log_level="WARNING" | ||
|
||
[extract] | ||
# use 2 workers to extract sources in parallel | ||
worker=2 | ||
# allow 10 async items to be processed in parallel | ||
max_parallel_items=10 | ||
|
||
[normalize] | ||
# use 3 worker processes to process 3 files in parallel | ||
workers=3 | ||
|
||
[load] | ||
# have 50 concurrent load jobs | ||
workers=50 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import dlt | ||
from dlt.sources.helpers import requests | ||
|
||
|
||
@dlt.source(max_table_nesting=2) | ||
def source(pokemon_api_url: str): | ||
"""""" | ||
|
||
# note that we deselect `pokemon_list` - we do not want it to be loaded | ||
@dlt.resource(write_disposition="replace", selected=False) | ||
def pokemon_list(): | ||
"""Retrieve a first page of Pokemons and yield it. We do not retrieve all the pages in this example""" | ||
yield requests.get(pokemon_api_url).json()["results"] | ||
|
||
# transformer that retrieves a list of objects in parallel | ||
@dlt.transformer | ||
def pokemon(pokemons): | ||
"""Yields details for a list of `pokemons`""" | ||
|
||
# @dlt.defer marks a function to be executed in parallel | ||
# in a thread pool | ||
@dlt.defer | ||
def _get_pokemon(_pokemon): | ||
return requests.get(_pokemon["url"]).json() | ||
|
||
# call and yield the function result normally, the @dlt.defer takes care of parallelism | ||
for _pokemon in pokemons: | ||
yield _get_pokemon(_pokemon) | ||
|
||
# a special case where just one item is retrieved in transformer | ||
# a whole transformer may be marked for parallel execution | ||
@dlt.transformer | ||
@dlt.defer | ||
def species(pokemon_details): | ||
"""Yields species details for a pokemon""" | ||
species_data = requests.get(pokemon_details["species"]["url"]).json() | ||
# link back to pokemon so we have a relation in loaded data | ||
species_data["pokemon_id"] = pokemon_details["id"] | ||
# just return the results, if you yield, | ||
# generator will be evaluated in main thread | ||
return species_data | ||
|
||
# create two simple pipelines with | operator | ||
# 1. send list of pokemons into `pokemon` transformer to get pokemon details | ||
# 2. send pokemon details into `species` transformer to get species details | ||
# NOTE: dlt is smart enough to get data from pokemon_list and pokemon details once | ||
|
||
return ( | ||
pokemon_list | pokemon, | ||
pokemon_list | pokemon | species | ||
) | ||
|
||
if __name__ == "__main__": | ||
# build duck db pipeline | ||
pipeline = dlt.pipeline( | ||
pipeline_name="pokemon", destination="duckdb", dataset_name="pokemon_data" | ||
) | ||
|
||
# the pokemon_list resource does not need to be loaded | ||
load_info = pipeline.run(source("https://pokeapi.co/api/v2/pokemon")) | ||
print(load_info) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.