dlt-hub · rudolfix · Feb 7, 2024 · Jan 18, 2024 · Jan 19, 2024 · Jan 22, 2024
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -25,7 +25,7 @@ jobs:
     defaults:
       run:
         shell: bash
-    runs-on: ${{ matrix.os }}    
+    runs-on: ${{ matrix.os }}
 
     steps:
 
@@ -42,7 +42,7 @@ jobs:
         with:
           virtualenvs-create: true
           virtualenvs-in-project: true
-          installer-parallel: true         
+          installer-parallel: true
 
       - name: Load cached venv
         id: cached-poetry-dependencies
@@ -57,7 +57,7 @@ jobs:
 
       - name: Run make lint
         run: |
-          export PATH=$PATH:"/c/Program Files/usr/bin" # needed for Windows        
+          export PATH=$PATH:"/c/Program Files/usr/bin" # needed for Windows
           make lint
 
       # - name: print envs

diff --git a/.github/workflows/test_destination_athena_iceberg.yml b/.github/workflows/test_destination_athena_iceberg.yml
@@ -65,7 +65,7 @@ jobs:
 
       - name: Install dependencies
         # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
-        run: poetry install --no-interaction -E  --with sentry-sdk --with pipeline
+        run: poetry install --no-interaction -E --with sentry-sdk --with pipeline
 
       - name: create secrets.toml
         run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml

diff --git a/dlt/common/libs/pydantic.py b/dlt/common/libs/pydantic.py
@@ -14,10 +14,11 @@
 )
 from typing_extensions import Annotated, get_args, get_origin
 
+from dlt.common.data_types import py_type_to_sc_type
 from dlt.common.exceptions import MissingDependencyException
 from dlt.common.schema import DataValidationError
 from dlt.common.schema.typing import TSchemaEvolutionMode, TTableSchemaColumns
-from dlt.common.data_types import py_type_to_sc_type
+from dlt.common.normalizers.naming.snake_case import NamingConvention as SnakeCaseNamingConvention
 from dlt.common.typing import (
     TDataItem,
     TDataItems,
@@ -52,6 +53,9 @@
 _TPydanticModel = TypeVar("_TPydanticModel", bound=BaseModel)
 
 
+snake_case_naming_convention = SnakeCaseNamingConvention()
+
+
 class ListModel(BaseModel, Generic[_TPydanticModel]):
     items: List[_TPydanticModel]
 
@@ -71,7 +75,7 @@ class DltConfig(TypedDict, total=False):
 
 
 def pydantic_to_table_schema_columns(
-    model: Union[BaseModel, Type[BaseModel]]
+    model: Union[BaseModel, Type[BaseModel]],
 ) -> TTableSchemaColumns:
     """Convert a pydantic model to a table schema columns dict
 
@@ -111,24 +115,47 @@ def pydantic_to_table_schema_columns(
 
         if is_list_generic_type(inner_type):
             inner_type = list
-        elif is_dict_generic_type(inner_type) or issubclass(inner_type, BaseModel):
+        elif is_dict_generic_type(inner_type):
             inner_type = dict
 
+        is_inner_type_pydantic_model = False
         name = field.alias or field_name
         try:
             data_type = py_type_to_sc_type(inner_type)
         except TypeError:
-            # try to coerce unknown type to text
-            data_type = "text"
-
-        if data_type == "complex" and skip_complex_types:
+            if issubclass(inner_type, BaseModel):
+                data_type = "complex"
+                is_inner_type_pydantic_model = True
+            else:
+                # try to coerce unknown type to text
+                data_type = "text"
+
+        if is_inner_type_pydantic_model and not skip_complex_types:
+            result[name] = {
+                "name": name,
+                "data_type": "complex",
+                "nullable": nullable,
+            }
+        elif is_inner_type_pydantic_model:
+            # This case is for a single field schema/model
+            # we need to generate snake_case field names
+            # and return flattened field schemas
+            schema_hints = pydantic_to_table_schema_columns(field.annotation)
+
+            for field_name, hints in schema_hints.items():
+                schema_key = snake_case_naming_convention.make_path(name, field_name)
+                result[schema_key] = {
+                    **hints,
+                    "name": snake_case_naming_convention.make_path(name, hints["name"]),
+                }
+        elif data_type == "complex" and skip_complex_types:
             continue
-
-        result[name] = {
-            "name": name,
-            "data_type": data_type,
-            "nullable": nullable,
-        }
+        else:
+            result[name] = {
+                "name": name,
+                "data_type": data_type,
+                "nullable": nullable,
+            }
 
     return result
 
@@ -261,7 +288,8 @@ def create_list_model(
     # TODO: use LenientList to create list model that automatically discards invalid items
     #   https://github.com/pydantic/pydantic/issues/2274 and https://gist.github.com/dmontagu/7f0cef76e5e0e04198dd608ad7219573
     return create_model(
-        "List" + __name__, items=(List[model], ...)  # type: ignore[return-value,valid-type]
+        "List" + __name__,
+        items=(List[model], ...),  # type: ignore[return-value,valid-type]
     )
 
 

diff --git a/dlt/common/schema/typing.py b/dlt/common/schema/typing.py
@@ -7,6 +7,7 @@
     Optional,
     Sequence,
     Set,
+    Tuple,
     Type,
     TypedDict,
     NewType,

diff --git a/dlt/extract/hints.py b/dlt/extract/hints.py
@@ -105,7 +105,11 @@ def compute_table_schema(self, item: TDataItem = None) -> TTableSchema:
         if self._table_name_hint_fun and item is None:
             raise DataItemRequiredForDynamicTableHints(self.name)
         # resolve
-        resolved_template: TResourceHints = {k: self._resolve_hint(item, v) for k, v in table_template.items() if k not in ["incremental", "validator", "original_columns"]}  # type: ignore
+        resolved_template: TResourceHints = {
+            k: self._resolve_hint(item, v)
+            for k, v in table_template.items()
+            if k not in ["incremental", "validator", "original_columns"]
+        }  # type: ignore
         table_schema = self._merge_keys(resolved_template)
         table_schema["resource"] = self.name
         validate_dict_ignoring_xkeys(

diff --git a/tests/libs/test_pydantic.py b/tests/libs/test_pydantic.py
@@ -10,7 +10,6 @@
     Union,
     Optional,
     List,
-    Dict,
     Any,
 )
 from typing_extensions import Annotated, get_args, get_origin

diff --git a/tests/pipeline/test_pipeline_extra.py b/tests/pipeline/test_pipeline_extra.py
@@ -200,3 +200,157 @@ def generic(start=8):
 
     pipeline = dlt.pipeline(destination="duckdb")
     pipeline.run(generic(), loader_file_format=file_format)
+
+
+class Child(BaseModel):
+    child_attribute: str
+    optional_child_attribute: Optional[str] = None
+
+
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(default_sql_configs=True, subset=["duckdb"]),
+    ids=lambda x: x.name,
+)
+def test_flattens_model_when_skip_complex_types_is_set(
+    destination_config: DestinationTestConfiguration,
+) -> None:
+    class Parent(BaseModel):
+        child: Child
+        optional_parent_attribute: Optional[str] = None
+        dlt_config: ClassVar[DltConfig] = {"skip_complex_types": True}
+
+    example_data = {
+        "optional_parent_attribute": None,
+        "child": {
+            "child_attribute": "any string",
+            "optional_child_attribute": None,
+        },
+    }
+
+    @dlt.resource
+    def res():
+        yield [example_data]
+
+    @dlt.source(max_table_nesting=1)
+    def src():
+        yield res()
+
+    p = destination_config.setup_pipeline("example", full_refresh=True)
+    p.run(src(), table_name="items", columns=Parent)
+
+    with p.sql_client() as client:
+        with client.execute_query("SELECT * FROM items") as cursor:
+            loaded_values = {
+                col[0]: val
+                for val, col in zip(cursor.fetchall()[0], cursor.description)
+                if col[0] not in ("_dlt_id", "_dlt_load_id")
+            }
+            assert loaded_values == {
+                "child__child_attribute": "any string",
+                "child__optional_child_attribute": None,
+                "optional_parent_attribute": None,
+            }
+
+    keys = p.default_schema.tables["items"]["columns"].keys()
+    columns = p.default_schema.tables["items"]["columns"]
+
+    assert keys == {
+        "child__child_attribute",
+        "child__optional_child_attribute",
+        "optional_parent_attribute",
+        "_dlt_load_id",
+        "_dlt_id",
+    }
+
+    assert columns["child__child_attribute"] == {
+        "name": "child__child_attribute",
+        "data_type": "text",
+        "nullable": False,
+    }
+
+    assert columns["child__optional_child_attribute"] == {
+        "name": "child__optional_child_attribute",
+        "data_type": "text",
+        "nullable": True,
+    }
+
+    assert columns["optional_parent_attribute"] == {
+        "name": "optional_parent_attribute",
+        "data_type": "text",
+        "nullable": True,
+    }
+
+
+@pytest.mark.parametrize(
+    "destination_config",
+    destinations_configs(default_sql_configs=True, subset=["duckdb"]),
+    ids=lambda x: x.name,
+)
+def test_flattens_model_when_skip_complex_types_is_not_set(
+    destination_config: DestinationTestConfiguration,
+):
+    class Parent(BaseModel):
+        child: Child
+        optional_parent_attribute: Optional[str] = None
+        data_dictionary: Dict[str, Any] = None
+        dlt_config: ClassVar[DltConfig] = {"skip_complex_types": False}
+
+    example_data = {
+        "optional_parent_attribute": None,
+        "data_dictionary": {
+            "child_attribute": "any string",
+        },
+        "child": {
+            "child_attribute": "any string",
+            "optional_child_attribute": None,
+        },
+    }
+
+    @dlt.resource
+    def res():
+        yield [example_data]
+
+    @dlt.source(max_table_nesting=1)
+    def src():
+        yield res()
+
+    p = destination_config.setup_pipeline("example", full_refresh=True)
+    p.run(src(), table_name="items", columns=Parent)
+
+    with p.sql_client() as client:
+        with client.execute_query("SELECT * FROM items") as cursor:
+            loaded_values = {
+                col[0]: val
+                for val, col in zip(cursor.fetchall()[0], cursor.description)
+                if col[0] not in ("_dlt_id", "_dlt_load_id")
+            }
+
+            assert loaded_values == {
+                "child": '{"child_attribute":"any string","optional_child_attribute":null}',
+                "optional_parent_attribute": None,
+                "data_dictionary": '{"child_attribute":"any string"}',
+            }
+
+    keys = p.default_schema.tables["items"]["columns"].keys()
+    assert keys == {
+        "child",
+        "optional_parent_attribute",
+        "data_dictionary",
+        "_dlt_load_id",
+        "_dlt_id",
+    }
+
+    columns = p.default_schema.tables["items"]["columns"]
+
+    assert columns["optional_parent_attribute"] == {
+        "name": "optional_parent_attribute",
+        "data_type": "text",
+        "nullable": True,
+    }
+
+    assert columns["data_dictionary"] == {
+        "name": "data_dictionary",
+        "data_type": "complex",
+        "nullable": False,
+    }
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,7 +10,6 @@ @@
         Union,
         Optional,
         List,
-        Dict,
         Any,
     )
     from typing_extensions import Annotated, get_args, get_origin
@@ Expand Down @@