Skip to content

Commit

Permalink
Pydantic json type, test special string types
Browse files Browse the repository at this point in the history
  • Loading branch information
steinitzu committed Sep 15, 2023
1 parent cf28907 commit 7cec42e
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 3 deletions.
4 changes: 4 additions & 0 deletions dlt/common/libs/pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ def pydantic_to_table_schema_columns(model: Union[BaseModel, Type[BaseModel]], s
fields = model.__fields__
for field_name, field in fields.items():
annotation = field.annotation
if inner_annotation := getattr(annotation, 'inner_type', None):
# This applies to pydantic.Json fields, the inner type is the type after json parsing
# (In pydantic 2 the outer annotation is the final type)
annotation = inner_annotation
nullable = is_optional_type(annotation)

if is_union(annotation):
Expand Down
6 changes: 4 additions & 2 deletions dlt/extract/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,10 @@ def resource(
write_disposition (Literal["skip", "append", "replace", "merge"], optional): Controls how to write data to a table. `append` will always add new data at the end of the table. `replace` will replace existing data with new data. `skip` will prevent data from loading. "merge" will deduplicate and merge data based on "primary_key" and "merge_key" hints. Defaults to "append".
This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes.
columns (Sequence[TAnySchemaColumns], optional): A list, dict or pydantic model of column schemas. Typed dictionary describing column names, data types, write disposition and performance hints that gives you full control over the created table schema.
This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes.
columns (Sequence[TAnySchemaColumns], optional): A list, dict or pydantic model of column schemas.
Typed dictionary describing column names, data types, write disposition and performance hints that gives you full control over the created table schema.
This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes.
When the argument is a pydantic model, the model will be used to validate the data yielded by the resource as well.
primary_key (str | Sequence[str]): A column name or a list of column names that comprise a private key. Typically used with "merge" write disposition to deduplicate loaded data.
This argument also accepts a callable that is used to dynamically create tables for stream-like resources yielding many datatypes.
Expand Down
12 changes: 11 additions & 1 deletion tests/common/test_pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from datetime import datetime, date, time # noqa: I251
from dlt.common import Decimal
from dlt.common import json

from pydantic import BaseModel
from pydantic import BaseModel, Json, AnyHttpUrl
from dlt.common.libs.pydantic import pydantic_to_table_schema_columns


Expand Down Expand Up @@ -56,6 +57,10 @@ class Model(BaseModel):
mixed_enum_int_field: MixedEnum
mixed_enum_str_field: MixedEnum

json_field: Json[List[str]]

url_field: AnyHttpUrl


@pytest.mark.parametrize('instance', [True, False])
def test_pydantic_model_to_columns(instance: bool) -> None:
Expand All @@ -74,6 +79,8 @@ def test_pydantic_model_to_columns(instance: bool) -> None:
int_enum_field=IntEnum.a,
mixed_enum_int_field=MixedEnum.a_int,
mixed_enum_str_field=MixedEnum.b_str,
json_field=json.dumps(["a", "b", "c"]), # type: ignore[arg-type]
url_field="https://example.com"
)
else:
model = Model # type: ignore[assignment]
Expand All @@ -98,6 +105,8 @@ def test_pydantic_model_to_columns(instance: bool) -> None:
assert result['int_enum_field']['data_type'] == 'bigint'
assert result['mixed_enum_int_field']['data_type'] == 'text'
assert result['mixed_enum_str_field']['data_type'] == 'text'
assert result['json_field']['data_type'] == 'complex'
assert result['url_field']['data_type'] == 'text'


def test_pydantic_model_skip_complex_types() -> None:
Expand All @@ -109,6 +118,7 @@ def test_pydantic_model_skip_complex_types() -> None:
assert "list_field" not in result
assert "blank_dict_field" not in result
assert "parametrized_dict_field" not in result
assert "json_field" not in result
assert result["bigint_field"]["data_type"] == "bigint"
assert result["text_field"]["data_type"] == "text"
assert result["timestamp_field"]["data_type"] == "timestamp"

0 comments on commit 7cec42e

Please sign in to comment.