Add custom parent-child relationships example

dlt-hub · Aug 10, 2024 · 0a9e0eb · 0a9e0eb
1 parent 683a096
commit 0a9e0eb
Showing 1 changed file with 27 additions and 20 deletions.
diff --git a/docs/examples/parent_child_relationship/parent_child_relationship.py b/docs/examples/parent_child_relationship/parent_child_relationship.py
@@ -1,59 +1,67 @@
 """
 ---
-title: Load parent table keys into child table
+title: Load parent table records into child table
 description: Learn how integrate custom parent keys into child records
 keywords: [parent child relationship, parent key]
 ---
 
-This example demonstrates handling data with parent-child relationships using the `dlt` library. This process is about integrating specific fields (e.g. primary, foreign keys) from a parent record into each child record, ensuring that we can use self defined parent keys in the child table. 
+This example demonstrates handling data with parent-child relationships using the `dlt` library. This process 
+is about integrating specific fields (e.g. primary, foreign keys) from a parent record into each child record, 
+ensuring that we can use self defined parent keys in the child table. 
 
 In this example, we'll explore how to:
 
 - Integrate a custom `parent_id` into each child record.
 - Ensure every child is correctly linked to its `parent_id` using a tailored function, `add_parent_id`.
-- Use the [`add_map` function](https://dlthub.com/docs/api_reference/extract/resource#add_map) to apply this custom logic to every record in our dataset.
+- Use the [`add_map` function](https://dlthub.com/docs/api_reference/extract/resource#add_map) to apply this 
+custom logic to every record in our dataset.
+
+:::note important
+Please note that dlt metadata, including `_dlt_id` and `_dlt_load_id`, will still be loaded into the tables.
+:::
 """
 
 from typing import Dict, Any
 import dlt
 
+
 # Define a dlt resource with write disposition to 'merge'
-@dlt.resource(name='parent_with_children', write_disposition={"disposition": "merge"})
+@dlt.resource(name="parent_with_children", write_disposition={"disposition": "merge"})
 def data_source() -> Dict[str, Any]:
     # Example data
     data = [
         {
-            'parent_id': 1,
-            'parent_name': 'Alice',
-            'children': [
-                {'child_id': 1, 'child_name': 'Child 1'},
-                {'child_id': 2, 'child_name': 'Child 2'}
-            ]
+            "parent_id": 1,
+            "parent_name": "Alice",
+            "children": [
+                {"child_id": 1, "child_name": "Child 1"},
+                {"child_id": 2, "child_name": "Child 2"},
+            ],
         },
         {
-            'parent_id': 2,
-            'parent_name': 'Bob',
-            'children': [
-                {'child_id': 3, 'child_name': 'Child 3'}
-            ]
-        }
+            "parent_id": 2,
+            "parent_name": "Bob",
+            "children": [{"child_id": 3, "child_name": "Child 3"}],
+        },
     ]
 
     yield data
 
+
 # Function to add parent_id to each child record within a parent record
 def add_parent_id(record: Dict[str, Any]) -> Dict[str, Any]:
     parent_id_key = "parent_id"
     for child in record["children"]:
         child[parent_id_key] = record[parent_id_key]
     return record
 
+
 if __name__ == "__main__":
     # Create and configure the dlt pipeline
     pipeline = dlt.pipeline(
-        pipeline_name='generic_pipeline',
-        destination='duckdb',
-        dataset_name='dataset',
+        pipeline_name="generic_pipeline",
+        destination="duckdb",
+        dataset_name="dataset",
     )
 
     # Run the pipeline
@@ -64,4 +72,3 @@ def add_parent_id(record: Dict[str, Any]) -> Dict[str, Any]:
     )
     # Output the load information after pipeline execution
     print(load_info)
-