-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add table schema validator #125
base: main
Are you sure you want to change the base?
Changes from all commits
73823fa
ad1c9ae
801fdef
cdd61cd
7d93860
279586f
aff62f2
d0bf9d2
e633ee1
44488aa
8113d43
4fae509
051cd3e
2c8f968
1eb2069
6fb61e1
3a69368
2f845c8
51272b7
cb148bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ jobs: | |
uses: ./.github/workflows/ci_template.yml | ||
with: | ||
os: '["ubuntu-latest", "windows-latest", "macos-latest"]' | ||
python-version: '["3.9", "3.10", "3.11", "3.12"]' | ||
python-version: '["3.10", "3.11", "3.12", "3.13"]' | ||
|
||
build-wheel: | ||
needs: test | ||
|
@@ -37,7 +37,7 @@ jobs: | |
|
||
- uses: actions/setup-python@v5 | ||
with: | ||
python-version: 3.9 | ||
python-version: 3.10 | ||
|
||
- name: Install Poetry | ||
uses: abatilo/[email protected] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
"""Validators for the CSVY format.""" | ||
|
||
from collections.abc import Mapping | ||
from typing import Any | ||
|
||
from pydantic import BaseModel | ||
|
||
from .csv_dialect import CSVDialectValidator # noqa: F401 | ||
from .registry import VALIDATORS_REGISTRY, register_validator # noqa: F401 | ||
from .table_schema import SchemaValidator # noqa: F401 | ||
|
||
|
||
def validate_header(header: dict[str, Any]) -> dict[str, Any]: | ||
"""Run the validators on the header. | ||
|
||
This function runs the validators on the header. It uses the keys of the header to | ||
find the validators in the registry and runs them on the corresponding values. As | ||
a result, some values in the header may be replaced by the validated values in the | ||
form of Pydantic models. | ||
|
||
If the header is an already validated header, the Pydantic models within, if any, | ||
are dumped to dictionaries and re-validated, again. This accounts for the case where | ||
attributes of the Pydantic models are changed to invalid values. | ||
|
||
Args: | ||
header: The header of the CSVY file. | ||
|
||
Returns: | ||
The validated header. | ||
|
||
""" | ||
validated_header: dict[str, Any] = {} | ||
for key, value in header.items(): | ||
value_ = value.model_dump() if isinstance(value, BaseModel) else value | ||
if key in VALIDATORS_REGISTRY: | ||
if not isinstance(value_, Mapping): | ||
raise TypeError( | ||
f"Value for '{key}' must be a mapping, not a '{type(value_)}'." | ||
) | ||
validator = VALIDATORS_REGISTRY[key] | ||
validated_header[key] = validator(**value_) | ||
else: | ||
validated_header[key] = value_ | ||
return validated_header | ||
|
||
|
||
def header_to_dict(header: dict[str, Any]) -> dict[str, Any]: | ||
"""Transform the header into a serializable dictionary. | ||
|
||
Transforms the header with validators to a header with dictionaries that can be | ||
saved as yaml. | ||
|
||
Args: | ||
header: Dictionary to be saved as the header of the CSVY file. | ||
|
||
Returns: | ||
The validated header, as a serializable dictionary. | ||
|
||
""" | ||
validated_header = {} | ||
for key, value in header.items(): | ||
validated_header[key] = ( | ||
value.model_dump() if isinstance(value, BaseModel) else value | ||
) | ||
return validated_header |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
"""Registry of validators to run on the header.""" | ||
|
||
from collections.abc import Callable | ||
|
||
from pydantic import BaseModel | ||
|
||
VALIDATORS_REGISTRY: dict[str, type[BaseModel]] = {} | ||
"""Registry of validators to run on the header.""" | ||
|
||
|
||
def register_validator( | ||
name: str, overwrite: bool = False | ||
) -> Callable[[type[BaseModel]], type[BaseModel]]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the I appreciate you may not want to add another dependency though! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've no problem with adding new dependencies, but I'm not convinced it adds much value in this case for just one, very simple decorator that just registers something and spits the same input. |
||
"""Register a validator in the registry. | ||
|
||
This function is a decorator that registers a validator in the registry. The name | ||
of the validator is used as the key in the registry. | ||
|
||
Args: | ||
name: The name of the validator. | ||
overwrite: Whether to overwrite the validator if it already exists. | ||
|
||
Returns: | ||
The decorator function that registers the validator. | ||
|
||
""" | ||
|
||
def decorator(cls: type[BaseModel]) -> type[BaseModel]: | ||
if not issubclass(cls, BaseModel): | ||
raise TypeError("Validators must be subclasses of pydantic.BaseModel.") | ||
|
||
if name in VALIDATORS_REGISTRY and not overwrite: | ||
raise ValueError(f"Validator with name '{name}' already exists.") | ||
|
||
VALIDATORS_REGISTRY[name] = cls | ||
return cls | ||
|
||
return decorator |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be clearer to explicitly put the default value here, e.g.:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, the default accoriding to the specification is not set it https://specs.frictionlessdata.io/csv-dialect/#specification
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And now that I check the specification, I'm missing several fields... 😢