From cf9cdad8c1c41950070f2ce7f115494726a751fd Mon Sep 17 00:00:00 2001 From: Adam Watkins Date: Wed, 13 Nov 2024 12:31:15 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=8C=B3=20Fix=20Null=20Checks=20for=20sub?= =?UTF-8?q?=20objects=20(#87)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix Null Checks for sub objects Co-authored-by: asim <50181239+asim-shrestha@users.noreply.github.com> --- core/harambe_core/parser/parser.py | 49 ++++++++++--------- core/pyproject.toml | 2 +- core/test/parser/test_null_values.py | 71 +++++++++++++++++++++++++++- core/test/parser/test_parser.py | 4 +- core/uv.lock | 2 +- sdk/pyproject.toml | 4 +- sdk/uv.lock | 4 +- 7 files changed, 104 insertions(+), 32 deletions(-) diff --git a/core/harambe_core/parser/parser.py b/core/harambe_core/parser/parser.py index f929562..974dadf 100644 --- a/core/harambe_core/parser/parser.py +++ b/core/harambe_core/parser/parser.py @@ -51,10 +51,6 @@ def validate(self, data: dict[str, Any], base_url: str) -> dict[str, Any]: self.field_types = self._get_field_types(base_url) model = self._schema_to_pydantic_model(self.schema) - if self._all_fields_empty(data): - raise SchemaValidationError( - message="All fields are null or empty.", - ) try: res = model(**data).model_dump() if self._pk_expression: @@ -245,25 +241,6 @@ def _get_type(self, field: SchemaFieldType, required: bool | None) -> Type[Any]: field_type = Optional[field_type] return field_type - def _all_fields_empty(self, data: dict[str, Any]) -> bool: - """ - Recursively check if all fields in the data are either None or empty. - This includes handling nested dictionaries and lists. - """ - - def is_empty(value: Any) -> bool: - if value is None: - return True - if isinstance(value, dict): - return all(is_empty(v) for v in value.values()) - if isinstance(value, list): - return all(is_empty(v) for v in value) - if isinstance(value, str): - return not value.strip() - return False - - return all(is_empty(value) for value in data.values()) - def base_model_factory( config: ConfigDict, computed_fields: dict[str, str], evaluator: ExpressionEvaluator @@ -304,6 +281,32 @@ def evaluate_computed_fields(self) -> Self: for field, expression in computed_fields.items(): res = evaluator.evaluate(expression, self) setattr(self, field, res) + + if _all_fields_empty(self.model_dump()): + raise SchemaValidationError( + message="All fields are null or empty.", + ) + return self return PreValidatedBaseModel + + +def _all_fields_empty(data: dict[str, Any]) -> bool: + """ + Recursively check if all fields in the data are either None or empty. + This includes handling nested dictionaries and lists. + """ + + def is_empty(value: Any) -> bool: + if value is None: + return True + if isinstance(value, dict): + return all(is_empty(v) for v in value.values()) + if isinstance(value, list): + return all(is_empty(v) for v in value) + if isinstance(value, str): + return not value.strip() + return False + + return all(is_empty(value) for value in data.values()) diff --git a/core/pyproject.toml b/core/pyproject.toml index 9b11629..0c4e28d 100644 --- a/core/pyproject.toml +++ b/core/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harambe-core" -version = "0.50.0" +version = "0.50.1" description = "Core types for harambe SDK 🐒🍌" authors = [ { name = "Adam Watkins", email = "adam@reworkd.ai" } diff --git a/core/test/parser/test_null_values.py b/core/test/parser/test_null_values.py index 38a87af..43ed358 100644 --- a/core/test/parser/test_null_values.py +++ b/core/test/parser/test_null_values.py @@ -118,7 +118,7 @@ def test_pydantic_schema_validation_error_fail(data: Dict[str, Any]) -> None: "code_type": "", "code": "", "code_description": "", - "description": "", + "description": "Something", } ], }, @@ -127,3 +127,72 @@ def test_pydantic_schema_validation_error_fail(data: Dict[str, Any]) -> None: def test_pydantic_schema_validation_success(data: Dict[str, Any]): validator = SchemaParser(government_contracts) validator.validate(data, base_url="http://example.com") + + +@pytest.mark.parametrize( + "data", + [ + {"group": "Team", "members": [{}]}, + { + "group": "Team", + "members": [ + { + "name": "", + "age": None, + }, + ], + }, + { + "group": "Team", + "members": [ + { + "name": "Adam", + "age": 29, + }, + { + "name": "", + "age": None, + }, + ], + }, + ], +) +def test_with_emtpy_objects(data): + schema = { + "group": {"type": "string"}, + "members": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer"}, + }, + }, + }, + } + + with pytest.raises(SchemaValidationError): + validator = SchemaParser(schema) + validator.validate(data, base_url="http://example.com") + + +@pytest.mark.parametrize( + "strings", + [ + ( + ["", None, None], + [None, None], + ["a", " "], + ["a", "b", "c", ""], + ) + ], +) +def test_with_empty_literals(strings): + schema = { + "strings": {"type": "array", "items": {"type": "integer"}}, + } + + with pytest.raises(SchemaValidationError): + validator = SchemaParser(schema) + validator.validate({"strings": strings}, base_url="http://example.com") diff --git a/core/test/parser/test_parser.py b/core/test/parser/test_parser.py index 08dfe37..fc21250 100644 --- a/core/test/parser/test_parser.py +++ b/core/test/parser/test_parser.py @@ -37,8 +37,8 @@ ( load_schema("contact"), { - "name": {"first_name": None, "last_name": None}, - "address": {"street": None, "city": None, "zip": None}, + "name": {"first_name": "Adam", "last_name": None}, + "address": {"street": None, "city": None, "zip": "9104"}, "phone_numbers": [{"type": "mobile", "number": "+1 (628) 555-3456"}], }, ), diff --git a/core/uv.lock b/core/uv.lock index 06f920c..c5889c0 100644 --- a/core/uv.lock +++ b/core/uv.lock @@ -141,7 +141,7 @@ wheels = [ [[package]] name = "harambe-core" -version = "0.50.0" +version = "0.50.1" source = { virtual = "." } dependencies = [ { name = "dateparser" }, diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index aeeac41..07cdc64 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "harambe-sdk" -version = "0.50.0" +version = "0.50.1" description = "Data extraction SDK for Playwright 🐒🍌" authors = [ { name = "Adam Watkins", email = "adam@reworkd.ai" } @@ -8,7 +8,7 @@ authors = [ requires-python = ">=3.11,<4.0" readme = "README.md" dependencies = [ - "harambe_core==0.50.0", + "harambe_core==0.50.1", "pydantic==2.9.2", "playwright==1.47.0", "setuptools==73.0.0", diff --git a/sdk/uv.lock b/sdk/uv.lock index 2073d14..5194ec9 100644 --- a/sdk/uv.lock +++ b/sdk/uv.lock @@ -428,7 +428,7 @@ wheels = [ [[package]] name = "harambe-core" -version = "0.50.0" +version = "0.50.1" source = { editable = "../core" } dependencies = [ { name = "dateparser" }, @@ -461,7 +461,7 @@ dev = [ [[package]] name = "harambe-sdk" -version = "0.50.0" +version = "0.50.1" source = { virtual = "." } dependencies = [ { name = "aiohttp" },