From 9cf59eea9c9e20ee1b551abc8c4f081fb3d7eea8 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Tue, 19 Sep 2023 13:10:54 -0400 Subject: [PATCH] Subclass validator from ItemTransform --- dlt/extract/schema.py | 4 ++-- dlt/extract/source.py | 12 ++++++------ dlt/extract/typing.py | 5 +---- dlt/extract/validation.py | 6 +++--- tests/extract/test_validation.py | 4 ++-- 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/dlt/extract/schema.py b/dlt/extract/schema.py index 1eac5624c3..80e9f6f32f 100644 --- a/dlt/extract/schema.py +++ b/dlt/extract/schema.py @@ -9,7 +9,7 @@ from dlt.common.validation import validate_dict_ignoring_xkeys from dlt.extract.incremental import Incremental -from dlt.extract.typing import TFunHintTemplate, TTableHintTemplate, ColumnValidator +from dlt.extract.typing import TFunHintTemplate, TTableHintTemplate, ValidateItem from dlt.extract.exceptions import DataItemRequiredForDynamicTableHints, InconsistentTableTemplate, TableNameMissing from dlt.extract.utils import ensure_table_schema_columns, ensure_table_schema_columns_hint from dlt.extract.validation import get_column_validator @@ -25,7 +25,7 @@ class TTableSchemaTemplate(TypedDict, total=False): primary_key: TTableHintTemplate[TColumnNames] merge_key: TTableHintTemplate[TColumnNames] incremental: Incremental[Any] - validator: ColumnValidator + validator: ValidateItem class DltResourceSchema: diff --git a/dlt/extract/source.py b/dlt/extract/source.py index 1724ecf233..39dcfc762c 100644 --- a/dlt/extract/source.py +++ b/dlt/extract/source.py @@ -17,7 +17,7 @@ from dlt.common.pipeline import PipelineContext, StateInjectableContext, SupportsPipelineRun, resource_state, source_state, pipeline_state from dlt.common.utils import graph_find_scc_nodes, flatten_list_or_items, get_callable_name, graph_edges_to_nodes, multi_context_manager, uniq_id -from dlt.extract.typing import DataItemWithMeta, ItemTransformFunc, ItemTransformFunctionWithMeta, TDecompositionStrategy, TableNameMeta, FilterItem, MapItem, YieldMapItem, ColumnValidator +from dlt.extract.typing import DataItemWithMeta, ItemTransformFunc, ItemTransformFunctionWithMeta, TDecompositionStrategy, TableNameMeta, FilterItem, MapItem, YieldMapItem, ValidateItem from dlt.extract.pipe import Pipe, ManagedPipeIterator, TPipeStep from dlt.extract.schema import DltResourceSchema, TTableSchemaTemplate from dlt.extract.incremental import Incremental, IncrementalResourceWrapper @@ -136,18 +136,18 @@ def incremental(self) -> IncrementalResourceWrapper: return incremental @property - def validator(self) -> Optional[ColumnValidator]: + def validator(self) -> Optional[ValidateItem]: """Gets validator transform if it is in the pipe""" - validator: ColumnValidator = None - step_no = self._pipe.find(ColumnValidator) + validator: ValidateItem = None + step_no = self._pipe.find(ValidateItem) if step_no >= 0: validator = self._pipe.steps[step_no] # type: ignore[assignment] return validator @validator.setter - def validator(self, validator: Optional[ColumnValidator]) -> None: + def validator(self, validator: Optional[ValidateItem]) -> None: """Add/remove or replace the validator in pipe""" - step_no = self._pipe.find(ColumnValidator) + step_no = self._pipe.find(ValidateItem) if step_no >= 0: self._pipe.remove_step(step_no) if validator: diff --git a/dlt/extract/typing.py b/dlt/extract/typing.py index 62bbf88df9..5f32556f92 100644 --- a/dlt/extract/typing.py +++ b/dlt/extract/typing.py @@ -126,12 +126,9 @@ def __call__(self, item: TDataItems, meta: Any = None) -> Optional[TDataItems]: yield from self._f(item) -class ColumnValidator(ABC): +class ValidateItem(ItemTransform[TDataItem]): """Base class for validators of data items. Subclass should implement the `__call__` method to either return the data item(s) or raise `extract.exceptions.ValidationError`. See `PydanticValidator` for possible implementation. """ - @abstractmethod - def __call__(self, item: TDataItem, meta: Any = None) -> TDataItem: - ... diff --git a/dlt/extract/validation.py b/dlt/extract/validation.py index 58b8de65c7..0a29bf107f 100644 --- a/dlt/extract/validation.py +++ b/dlt/extract/validation.py @@ -8,13 +8,13 @@ from dlt.extract.exceptions import ValidationError from dlt.common.typing import TDataItems from dlt.common.schema.typing import TAnySchemaColumns -from dlt.extract.typing import TTableHintTemplate, ColumnValidator +from dlt.extract.typing import TTableHintTemplate, ValidateItem _TPydanticModel = TypeVar("_TPydanticModel", bound=PydanticBaseModel) -class PydanticValidator(ColumnValidator, Generic[_TPydanticModel]): +class PydanticValidator(ValidateItem, Generic[_TPydanticModel]): model: Type[_TPydanticModel] def __init__(self, model: Type[_TPydanticModel]) -> None: self.model = model @@ -37,7 +37,7 @@ def __call__(self, item: TDataItems, meta: Any = None) -> Union[_TPydanticModel, raise ValidationError(e) from e -def get_column_validator(columns: TTableHintTemplate[TAnySchemaColumns]) -> Optional[ColumnValidator]: +def get_column_validator(columns: TTableHintTemplate[TAnySchemaColumns]) -> Optional[ValidateItem]: if PydanticBaseModel is not None and isinstance(columns, type) and issubclass(columns, PydanticBaseModel): return PydanticValidator(columns) return None diff --git a/tests/extract/test_validation.py b/tests/extract/test_validation.py index 26e3d8f90b..855bed4326 100644 --- a/tests/extract/test_validation.py +++ b/tests/extract/test_validation.py @@ -4,7 +4,7 @@ import pytest import dlt -from dlt.extract.typing import ColumnValidator +from dlt.extract.typing import ValidateItem from dlt.common.typing import TDataItems from dlt.extract.validation import PydanticValidator from dlt.extract.exceptions import ValidationError, ResourceExtractionError @@ -100,7 +100,7 @@ class AnotherModel(BaseModel): steps = resource._pipe.steps assert len(steps) == 2 - assert isinstance(steps[-1], ColumnValidator) + assert isinstance(steps[-1], ValidateItem) assert steps[-1].model is AnotherModel