diff --git a/dlt/common/libs/pydantic.py b/dlt/common/libs/pydantic.py index 057cbb57d5..2df85638d1 100644 --- a/dlt/common/libs/pydantic.py +++ b/dlt/common/libs/pydantic.py @@ -26,6 +26,10 @@ def pydantic_to_table_schema_columns(model: Union[BaseModel, Type[BaseModel]], s fields = model.__fields__ for field_name, field in fields.items(): annotation = field.annotation + if inner_annotation := getattr(annotation, 'inner_type', None): + # This applies to pydantic.Json fields, the inner type is the type after json parsing + # (In pydantic 2 the outer annotation is the final type) + annotation = inner_annotation nullable = is_optional_type(annotation) if is_union(annotation): diff --git a/dlt/extract/decorators.py b/dlt/extract/decorators.py index 14003a2612..e769cc74e4 100644 --- a/dlt/extract/decorators.py +++ b/dlt/extract/decorators.py @@ -282,8 +282,10 @@ def resource( write_disposition (Literal["skip", "append", "replace", "merge"], optional): Controls how to write data to a table. `append` will always add new data at the end of the table. `replace` will replace existing data with new data. `skip` will prevent data from loading. "merge" will deduplicate and merge data based on "primary_key" and "merge_key" hints. Defaults to "append". This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. - columns (Sequence[TAnySchemaColumns], optional): A list, dict or pydantic model of column schemas. Typed dictionary describing column names, data types, write disposition and performance hints that gives you full control over the created table schema. - This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. + columns (Sequence[TAnySchemaColumns], optional): A list, dict or pydantic model of column schemas. + Typed dictionary describing column names, data types, write disposition and performance hints that gives you full control over the created table schema. + This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. + When the argument is a pydantic model, the model will be used to validate the data yielded by the resource as well. primary_key (str | Sequence[str]): A column name or a list of column names that comprise a private key. Typically used with "merge" write disposition to deduplicate loaded data. This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes. diff --git a/tests/common/test_pydantic.py b/tests/common/test_pydantic.py index 3a9056350a..bb480b8b9e 100644 --- a/tests/common/test_pydantic.py +++ b/tests/common/test_pydantic.py @@ -4,8 +4,9 @@ from datetime import datetime, date, time # noqa: I251 from dlt.common import Decimal +from dlt.common import json -from pydantic import BaseModel +from pydantic import BaseModel, Json, AnyHttpUrl from dlt.common.libs.pydantic import pydantic_to_table_schema_columns @@ -56,6 +57,10 @@ class Model(BaseModel): mixed_enum_int_field: MixedEnum mixed_enum_str_field: MixedEnum + json_field: Json[List[str]] + + url_field: AnyHttpUrl + @pytest.mark.parametrize('instance', [True, False]) def test_pydantic_model_to_columns(instance: bool) -> None: @@ -74,6 +79,8 @@ def test_pydantic_model_to_columns(instance: bool) -> None: int_enum_field=IntEnum.a, mixed_enum_int_field=MixedEnum.a_int, mixed_enum_str_field=MixedEnum.b_str, + json_field=json.dumps(["a", "b", "c"]), # type: ignore[arg-type] + url_field="https://example.com" ) else: model = Model # type: ignore[assignment] @@ -98,6 +105,8 @@ def test_pydantic_model_to_columns(instance: bool) -> None: assert result['int_enum_field']['data_type'] == 'bigint' assert result['mixed_enum_int_field']['data_type'] == 'text' assert result['mixed_enum_str_field']['data_type'] == 'text' + assert result['json_field']['data_type'] == 'complex' + assert result['url_field']['data_type'] == 'text' def test_pydantic_model_skip_complex_types() -> None: @@ -109,6 +118,7 @@ def test_pydantic_model_skip_complex_types() -> None: assert "list_field" not in result assert "blank_dict_field" not in result assert "parametrized_dict_field" not in result + assert "json_field" not in result assert result["bigint_field"]["data_type"] == "bigint" assert result["text_field"]["data_type"] == "text" assert result["timestamp_field"]["data_type"] == "timestamp"