Skip to content

Commit

Permalink
🌳 Fix Null Checks for sub objects (#87)
Browse files Browse the repository at this point in the history
Fix Null Checks for sub objects 

Co-authored-by: asim <[email protected]>
  • Loading branch information
awtkns and asim-shrestha authored Nov 13, 2024
1 parent 39f29a7 commit cf9cdad
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 32 deletions.
49 changes: 26 additions & 23 deletions core/harambe_core/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ def validate(self, data: dict[str, Any], base_url: str) -> dict[str, Any]:
self.field_types = self._get_field_types(base_url)
model = self._schema_to_pydantic_model(self.schema)

if self._all_fields_empty(data):
raise SchemaValidationError(
message="All fields are null or empty.",
)
try:
res = model(**data).model_dump()
if self._pk_expression:
Expand Down Expand Up @@ -245,25 +241,6 @@ def _get_type(self, field: SchemaFieldType, required: bool | None) -> Type[Any]:
field_type = Optional[field_type]
return field_type

def _all_fields_empty(self, data: dict[str, Any]) -> bool:
"""
Recursively check if all fields in the data are either None or empty.
This includes handling nested dictionaries and lists.
"""

def is_empty(value: Any) -> bool:
if value is None:
return True
if isinstance(value, dict):
return all(is_empty(v) for v in value.values())
if isinstance(value, list):
return all(is_empty(v) for v in value)
if isinstance(value, str):
return not value.strip()
return False

return all(is_empty(value) for value in data.values())


def base_model_factory(
config: ConfigDict, computed_fields: dict[str, str], evaluator: ExpressionEvaluator
Expand Down Expand Up @@ -304,6 +281,32 @@ def evaluate_computed_fields(self) -> Self:
for field, expression in computed_fields.items():
res = evaluator.evaluate(expression, self)
setattr(self, field, res)

if _all_fields_empty(self.model_dump()):
raise SchemaValidationError(
message="All fields are null or empty.",
)

return self

return PreValidatedBaseModel


def _all_fields_empty(data: dict[str, Any]) -> bool:
"""
Recursively check if all fields in the data are either None or empty.
This includes handling nested dictionaries and lists.
"""

def is_empty(value: Any) -> bool:
if value is None:
return True
if isinstance(value, dict):
return all(is_empty(v) for v in value.values())
if isinstance(value, list):
return all(is_empty(v) for v in value)
if isinstance(value, str):
return not value.strip()
return False

return all(is_empty(value) for value in data.values())
2 changes: 1 addition & 1 deletion core/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "harambe-core"
version = "0.50.0"
version = "0.50.1"
description = "Core types for harambe SDK 🐒🍌"
authors = [
{ name = "Adam Watkins", email = "[email protected]" }
Expand Down
71 changes: 70 additions & 1 deletion core/test/parser/test_null_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_pydantic_schema_validation_error_fail(data: Dict[str, Any]) -> None:
"code_type": "",
"code": "",
"code_description": "",
"description": "",
"description": "Something",
}
],
},
Expand All @@ -127,3 +127,72 @@ def test_pydantic_schema_validation_error_fail(data: Dict[str, Any]) -> None:
def test_pydantic_schema_validation_success(data: Dict[str, Any]):
validator = SchemaParser(government_contracts)
validator.validate(data, base_url="http://example.com")


@pytest.mark.parametrize(
"data",
[
{"group": "Team", "members": [{}]},
{
"group": "Team",
"members": [
{
"name": "",
"age": None,
},
],
},
{
"group": "Team",
"members": [
{
"name": "Adam",
"age": 29,
},
{
"name": "",
"age": None,
},
],
},
],
)
def test_with_emtpy_objects(data):
schema = {
"group": {"type": "string"},
"members": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
},
},
},
}

with pytest.raises(SchemaValidationError):
validator = SchemaParser(schema)
validator.validate(data, base_url="http://example.com")


@pytest.mark.parametrize(
"strings",
[
(
["", None, None],
[None, None],
["a", " "],
["a", "b", "c", ""],
)
],
)
def test_with_empty_literals(strings):
schema = {
"strings": {"type": "array", "items": {"type": "integer"}},
}

with pytest.raises(SchemaValidationError):
validator = SchemaParser(schema)
validator.validate({"strings": strings}, base_url="http://example.com")
4 changes: 2 additions & 2 deletions core/test/parser/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
(
load_schema("contact"),
{
"name": {"first_name": None, "last_name": None},
"address": {"street": None, "city": None, "zip": None},
"name": {"first_name": "Adam", "last_name": None},
"address": {"street": None, "city": None, "zip": "9104"},
"phone_numbers": [{"type": "mobile", "number": "+1 (628) 555-3456"}],
},
),
Expand Down
2 changes: 1 addition & 1 deletion core/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions sdk/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[project]
name = "harambe-sdk"
version = "0.50.0"
version = "0.50.1"
description = "Data extraction SDK for Playwright 🐒🍌"
authors = [
{ name = "Adam Watkins", email = "[email protected]" }
]
requires-python = ">=3.11,<4.0"
readme = "README.md"
dependencies = [
"harambe_core==0.50.0",
"harambe_core==0.50.1",
"pydantic==2.9.2",
"playwright==1.47.0",
"setuptools==73.0.0",
Expand Down
4 changes: 2 additions & 2 deletions sdk/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit cf9cdad

Please sign in to comment.