Skip to content

Commit

Permalink
Add dispatch to multiple tables to the sidebar
Browse files Browse the repository at this point in the history
  • Loading branch information
burnash committed Jan 16, 2024
1 parent 56605bb commit 9422eb0
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
29 changes: 19 additions & 10 deletions docs/website/docs/walkthroughs/dispatch-to-multiple-tables.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
### Dispatch stream of events to tables by event type
---
title: Dispatch stream of events to multiple tables
description: dispatch stream of events to tables by event type
keywords: [dispatch, stream, events, tables, event type]
---

This is a fun but practical example that reads GitHub events from **dlt** repository (such as issue or pull request created, comment added etc.).
Each event type is sent to a different table in `duckdb`.
Expand All @@ -7,19 +11,23 @@ Each event type is sent to a different table in `duckdb`.
import dlt
from dlt.sources.helpers import requests

@dlt.resource(primary_key="id", table_name=lambda i: i["type"], write_disposition="append")
def repo_events(
last_created_at = dlt.sources.incremental("created_at")
):
@dlt.resource(
primary_key="id",
table_name=lambda i: i["type"],
write_disposition="append",
)
def repo_events(last_created_at=dlt.sources.incremental("created_at")):
url = "https://api.github.com/repos/dlt-hub/dlt/events?per_page=100"

while True:
response = requests.get(url)
response.raise_for_status()
yield response.json()

# stop requesting pages if the last element was already older than initial value
# note: incremental will skip those items anyway, we just do not want to use the api limits
# stop requesting pages if the last element was already older than
# the initial value
# note: incremental will skip those items anyway, we just do not
# want to use the api limits
if last_created_at.start_out_of_range:
break

Expand All @@ -28,10 +36,11 @@ def repo_events(
break
url = response.links["next"]["url"]


pipeline = dlt.pipeline(
pipeline_name='github_events',
destination='duckdb',
dataset_name='github_events_data',
pipeline_name="github_events",
destination="duckdb",
dataset_name="github_events_data",
)
load_info = pipeline.run(repo_events)
row_counts = pipeline.last_trace.last_normalize_info
Expand Down
1 change: 1 addition & 0 deletions docs/website/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ const sidebars = {
'running-in-production/tracing',
],
},
'walkthroughs/dispatch-to-multiple-tables',
'walkthroughs/create-new-destination',
'walkthroughs/zendesk-weaviate',
],
Expand Down

0 comments on commit 9422eb0

Please sign in to comment.