From 444069b27028cd9eb7ca66fbde04c127468f936c Mon Sep 17 00:00:00 2001 From: Jeremy McCormick Date: Wed, 15 Jan 2025 13:08:57 -0600 Subject: [PATCH 1/7] Add column groups to the data model --- python/felis/datamodel.py | 137 ++++++++++++++++++++++++++++++++++++-- tests/data/sales.yaml | 8 +++ tests/test_datamodel.py | 71 ++++++++++++++++++++ 3 files changed, 210 insertions(+), 6 deletions(-) diff --git a/python/felis/datamodel.py b/python/felis/datamodel.py index 53ccbf2c..25f3dc8a 100644 --- a/python/felis/datamodel.py +++ b/python/felis/datamodel.py @@ -134,6 +134,32 @@ class DataType(StrEnum): timestamp = auto() +def validate_ivoa_ucd(ivoa_ucd: str) -> str: + """Validate IVOA UCD values. + + Parameters + ---------- + ivoa_ucd + IVOA UCD value to check. + + Returns + ------- + `str` + The IVOA UCD value if it is valid. + + Raises + ------ + ValueError + If the IVOA UCD value is invalid. + """ + if ivoa_ucd is not None: + try: + ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd) + except ValueError as e: + raise ValueError(f"Invalid IVOA UCD: {e}") + return ivoa_ucd + + class Column(BaseObject): """Column model.""" @@ -235,12 +261,7 @@ def check_ivoa_ucd(cls, ivoa_ucd: str) -> str: `str` The IVOA UCD value if it is valid. """ - if ivoa_ucd is not None: - try: - ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd) - except ValueError as e: - raise ValueError(f"Invalid IVOA UCD: {e}") - return ivoa_ucd + return validate_ivoa_ucd(ivoa_ucd) @model_validator(mode="after") def check_units(self) -> Column: @@ -551,6 +572,70 @@ def check_columns_or_expressions(cls, values: dict[str, Any]) -> dict[str, Any]: """Type alias for a constraint type.""" +ColumnRef: TypeAlias = str +"""Type alias for a column reference.""" + + +class ColumnGroup(BaseObject): + """Column group model.""" + + columns: list[ColumnRef | Column] = Field(..., min_length=1) + """Columns in the group.""" + + ivoa_ucd: str | None = Field(None, alias="ivoa:ucd") + """IVOA UCD of the column.""" + + table: Table | None = None + """Reference to the parent table.""" + + @field_validator("ivoa_ucd") + @classmethod + def check_ivoa_ucd(cls, ivoa_ucd: str) -> str: + """Check that IVOA UCD values are valid. + + Parameters + ---------- + ivoa_ucd + IVOA UCD value to check. + + Returns + ------- + `str` + The IVOA UCD value if it is valid. + """ + return validate_ivoa_ucd(ivoa_ucd) + + @model_validator(mode="after") + def check_unique_columns(self) -> ColumnGroup: + """Check that the columns list contains unique items. + + Returns + ------- + `ColumnGroup` + The column group being validated. + """ + column_ids = [col if isinstance(col, str) else col.id for col in self.columns] + if len(column_ids) != len(set(column_ids)): + raise ValueError("Columns in the group must be unique") + return self + + def _dereference_columns(self) -> None: + """Dereference ColumnRef to Column objects.""" + if self.table is None: + raise ValueError("ColumnGroup must have a reference to its parent table") + + dereferenced_columns: list[ColumnRef | Column] = [] + for col in self.columns: + if isinstance(col, str): + # Dereference ColumnRef to Column object + col_obj = self.table._find_column_by_id(col) + dereferenced_columns.append(col_obj) + else: + dereferenced_columns.append(col) + + self.columns = dereferenced_columns + + class Table(BaseObject): """Table model.""" @@ -563,6 +648,9 @@ class Table(BaseObject): indexes: list[Index] = Field(default_factory=list) """Indexes on the table.""" + column_groups: list[ColumnGroup] = Field(default_factory=list, alias="columnGroups") + """Column groups in the table.""" + primary_key: str | list[str] | None = Field(None, alias="primaryKey") """Primary key of the table.""" @@ -653,6 +741,43 @@ def check_tap_principal(self, info: ValidationInfo) -> Table: return self raise ValueError(f"Table '{self.name}' is missing at least one column designated as 'tap:principal'") + def _find_column_by_id(self, id: str) -> Column: + """Find a column by ID. + + Parameters + ---------- + id + The ID of the column to find. + + Returns + ------- + `Column` + The column with the given ID. + + Raises + ------ + ValueError + Raised if the column is not found. + """ + for column in self.columns: + if column.id == id: + return column + raise ValueError(f"Column '{id}' not found in table '{self.name}'") + + @model_validator(mode="after") + def dereference_column_groups(self: Table) -> Table: + """Dereference columns in column groups. + + Returns + ------- + `Table` + The table with dereferenced column groups. + """ + for group in self.column_groups: + group.table = self + group._dereference_columns() + return self + class SchemaVersion(BaseModel): """Schema version model.""" diff --git a/tests/data/sales.yaml b/tests/data/sales.yaml index 6850a879..ced19e09 100644 --- a/tests/data/sales.yaml +++ b/tests/data/sales.yaml @@ -22,6 +22,14 @@ tables: datatype: string description: Customer address length: 100 + columnGroups: + - name: customer_info + "@id": "#customers.customer_info" + description: Customer information + ivoa:ucd: meta + columns: + - "#customers.name" + - "#customers.address" indexes: - name: idx_name "@id": "#customers_idx_name" diff --git a/tests/test_datamodel.py b/tests/test_datamodel.py index 304498d4..c95be929 100644 --- a/tests/test_datamodel.py +++ b/tests/test_datamodel.py @@ -31,6 +31,7 @@ from felis.datamodel import ( CheckConstraint, Column, + ColumnGroup, Constraint, DataType, ForeignKeyConstraint, @@ -260,6 +261,76 @@ def test_validation(self) -> None: Table(name="testTable", id="#test_id", columns=[testCol, testCol]) +class ColumnGroupTestCase(unittest.TestCase): + """Test Pydantic validation of the ``ColumnGroup`` class.""" + + def test_validation(self) -> None: + """Test Pydantic validation of the ``ColumnGroup`` class.""" + # Default initialization should throw an exception. + with self.assertRaises(ValidationError): + ColumnGroup() + + # Setting only name should throw an exception. + with self.assertRaises(ValidationError): + ColumnGroup(name="testGroup") + + # Setting name and id should throw an exception from missing columns. + with self.assertRaises(ValidationError): + ColumnGroup(name="testGroup", id="#test_id") + + col = Column(name="testColumn", id="#test_col", datatype="string", length=256) + + # Setting name, id, and columns should not throw an exception and + # should load data correctly. + group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta") + self.assertEqual(group.name, "testGroup", "name should be 'testGroup'") + self.assertEqual(group.id, "#test_group", "id should be '#test_group'") + self.assertEqual(group.columns, [col], "columns should be ['testColumn']") + + # Dereferencing columns without setting a table should raise an + # exception. + with self.assertRaises(ValueError): + group._dereference_columns() + + # Creating a group with duplicate column names should raise an + # exception. + with self.assertRaises(ValidationError): + ColumnGroup(name="testGroup", id="#test_group", columns=[col, col]) + + # Check that including a column object in a group works correctly. + group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta") + table = Table( + name="testTable", + id="#test_table", + columns=[col], + column_groups=[group], + ) + self.assertEqual(table.column_groups, [group], "column_groups should be [group]") + self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol") + + # Check that column derefencing works correctly when group is assigned + # to a table. + group = ColumnGroup(name="testGroup", id="#test_group", columns=["#test_col"], ivoa_ucd="meta") + table = Table( + name="testTable", + id="#test_table", + columns=[col], + column_groups=[group], + ) + self.assertEqual(table.column_groups, [group], "column_groups should be [group]") + self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol") + + # Creating a group with a bad column should raise an exception. + group = ColumnGroup(name="testGroup", id="#test_group", columns=["#bad_col"], ivoa_ucd="meta") + with self.assertRaises(ValueError): + table = Table( + name="testTable", + id="#test_table", + columns=[col], + column_groups=[group], + ) + + class ConstraintTestCase(unittest.TestCase): """Test Pydantic validation of the different constraint classes.""" From 7a6500376edfc901ddf08e7437cd152322bc3c68 Mon Sep 17 00:00:00 2001 From: Jeremy McCormick Date: Wed, 15 Jan 2025 13:09:11 -0600 Subject: [PATCH 2/7] Add install target --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index d389bf63..87e5af85 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,8 @@ build: deps: @uv pip install --upgrade -r requirements.txt +install: deps build + docs: @rm -rf docs/dev/internals docs/_build @tox -e docs From 7ea2fa628fe43f196835f26a908481a7194630ad Mon Sep 17 00:00:00 2001 From: Jeremy McCormick Date: Wed, 15 Jan 2025 13:55:59 -0600 Subject: [PATCH 3/7] Add news fragment --- docs/changes/DM-48427.feature.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/changes/DM-48427.feature.rst diff --git a/docs/changes/DM-48427.feature.rst b/docs/changes/DM-48427.feature.rst new file mode 100644 index 00000000..bee06ed8 --- /dev/null +++ b/docs/changes/DM-48427.feature.rst @@ -0,0 +1,3 @@ +Column grouping functionality was added to the data model. +These are sets of related columns that, in addition to the standard object attributes, may have an `ivoa:ucd`. +Additional information on column groups was added to the User Guide. From d07a0308041b3b76566a7c7353f5699c1b9dac87 Mon Sep 17 00:00:00 2001 From: Jeremy McCormick Date: Wed, 15 Jan 2025 18:56:05 -0600 Subject: [PATCH 4/7] Add documentation on column groups --- docs/user-guide/model.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/user-guide/model.rst b/docs/user-guide/model.rst index 8cbe9900..0494ab92 100644 --- a/docs/user-guide/model.rst +++ b/docs/user-guide/model.rst @@ -105,6 +105,18 @@ A column may also have the following optional properties: .. [2] `TAP Access Protocol (TAP) specification `__ .. [3] `VOTable specification `__ +************* +Column Groups +************* + +A `column group <../dev/internals/felis.datamodel.Schema.html#felis.datamodel.ColumnGroup>`__ represents a set of related columns in a table. +In addition to the standard column attributes, column groups have the following attributes: + +:``ivoa:ucd``: The `IVOA UCD `__ for this column group. +:``columns``: The list of columns in this column group, which should be IDs of columns in the table. This is a required field. + +The functionality of column groups is currently limited but may be expanded in future versions of Felis, in particular to support VOTable ``GROUP`` elements. + .. _Constraint: ********** From 345f6b6b5640f85100352bbd6ca9bd0af4879522 Mon Sep 17 00:00:00 2001 From: Jeremy McCormick Date: Mon, 27 Jan 2025 12:24:20 -0600 Subject: [PATCH 5/7] Add target for running tests quietly --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 87e5af85..231b0a1e 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ help: @echo " docs - Generate the documentation" @echo " check - Run pre-commit checks" @echo " test - Run tests" + @echo " testq - Run tests quietly" @echo " numpydoc - Check numpydoc style" @echo " mypy - Run mypy static type checker" @echo " all - Run all tasks" @@ -36,6 +37,9 @@ check: test: @pytest -s --log-level DEBUG +testq: + @pytest -q + numpydoc: @python -m numpydoc.hooks.validate_docstrings $(shell find python -name "*.py" ! -name "cli.py") From 7c8667572588cf1814a9eb00f3f582fe5828caf9 Mon Sep 17 00:00:00 2001 From: Jeremy McCormick Date: Mon, 27 Jan 2025 18:12:58 -0600 Subject: [PATCH 6/7] Remove line ending --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 213b2341..7b09abf0 100644 --- a/.gitignore +++ b/.gitignore @@ -136,4 +136,3 @@ python/*.dist-info/ # VS Code workspace dir .vscode - From da3a7f244602c18563e214d02588ad7379fe805a Mon Sep 17 00:00:00 2001 From: Jeremy McCormick Date: Mon, 27 Jan 2025 18:14:04 -0600 Subject: [PATCH 7/7] Add missing parameter MEtaDataBuilder class documentation --- python/felis/metadata.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/felis/metadata.py b/python/felis/metadata.py index ae73d361..5a45b244 100644 --- a/python/felis/metadata.py +++ b/python/felis/metadata.py @@ -127,6 +127,8 @@ class MetaDataBuilder: Whether to apply the schema name to the metadata object. ignore_constraints Whether to ignore constraints when building the metadata. + table_name_postfix + A string to append to the table names when building the metadata. """ def __init__(