From 0a9e0eb566fea0a2350cb68ce1aca6357e33ca3f Mon Sep 17 00:00:00 2001 From: dat-a-man <98139823+dat-a-man@users.noreply.github.com> Date: Sat, 10 Aug 2024 07:35:15 +0000 Subject: [PATCH] Add custom parent-child relationships example --- .../parent_child_relationship.py | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/docs/examples/parent_child_relationship/parent_child_relationship.py b/docs/examples/parent_child_relationship/parent_child_relationship.py index ae5bd1d95d..5c39bd9dce 100644 --- a/docs/examples/parent_child_relationship/parent_child_relationship.py +++ b/docs/examples/parent_child_relationship/parent_child_relationship.py @@ -1,46 +1,53 @@ """ --- -title: Load parent table keys into child table +title: Load parent table records into child table description: Learn how integrate custom parent keys into child records keywords: [parent child relationship, parent key] --- -This example demonstrates handling data with parent-child relationships using the `dlt` library. This process is about integrating specific fields (e.g. primary, foreign keys) from a parent record into each child record, ensuring that we can use self defined parent keys in the child table. +This example demonstrates handling data with parent-child relationships using the `dlt` library. This process +is about integrating specific fields (e.g. primary, foreign keys) from a parent record into each child record, +ensuring that we can use self defined parent keys in the child table. In this example, we'll explore how to: - Integrate a custom `parent_id` into each child record. - Ensure every child is correctly linked to its `parent_id` using a tailored function, `add_parent_id`. -- Use the [`add_map` function](https://dlthub.com/docs/api_reference/extract/resource#add_map) to apply this custom logic to every record in our dataset. +- Use the [`add_map` function](https://dlthub.com/docs/api_reference/extract/resource#add_map) to apply this +custom logic to every record in our dataset. + +:::note important +Please note that dlt metadata, including `_dlt_id` and `_dlt_load_id`, will still be loaded into the tables. +::: """ from typing import Dict, Any import dlt + # Define a dlt resource with write disposition to 'merge' -@dlt.resource(name='parent_with_children', write_disposition={"disposition": "merge"}) +@dlt.resource(name="parent_with_children", write_disposition={"disposition": "merge"}) def data_source() -> Dict[str, Any]: # Example data data = [ { - 'parent_id': 1, - 'parent_name': 'Alice', - 'children': [ - {'child_id': 1, 'child_name': 'Child 1'}, - {'child_id': 2, 'child_name': 'Child 2'} - ] + "parent_id": 1, + "parent_name": "Alice", + "children": [ + {"child_id": 1, "child_name": "Child 1"}, + {"child_id": 2, "child_name": "Child 2"}, + ], }, { - 'parent_id': 2, - 'parent_name': 'Bob', - 'children': [ - {'child_id': 3, 'child_name': 'Child 3'} - ] - } + "parent_id": 2, + "parent_name": "Bob", + "children": [{"child_id": 3, "child_name": "Child 3"}], + }, ] yield data + # Function to add parent_id to each child record within a parent record def add_parent_id(record: Dict[str, Any]) -> Dict[str, Any]: parent_id_key = "parent_id" @@ -48,12 +55,13 @@ def add_parent_id(record: Dict[str, Any]) -> Dict[str, Any]: child[parent_id_key] = record[parent_id_key] return record + if __name__ == "__main__": # Create and configure the dlt pipeline pipeline = dlt.pipeline( - pipeline_name='generic_pipeline', - destination='duckdb', - dataset_name='dataset', + pipeline_name="generic_pipeline", + destination="duckdb", + dataset_name="dataset", ) # Run the pipeline @@ -64,4 +72,3 @@ def add_parent_id(record: Dict[str, Any]) -> Dict[str, Any]: ) # Output the load information after pipeline execution print(load_info) -