diff --git a/.gitignore b/.gitignore
index 213b2341..7b09abf0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,4 +136,3 @@ python/*.dist-info/
# VS Code workspace dir
.vscode
-
diff --git a/Makefile b/Makefile
index d389bf63..231b0a1e 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,7 @@ help:
@echo " docs - Generate the documentation"
@echo " check - Run pre-commit checks"
@echo " test - Run tests"
+ @echo " testq - Run tests quietly"
@echo " numpydoc - Check numpydoc style"
@echo " mypy - Run mypy static type checker"
@echo " all - Run all tasks"
@@ -24,6 +25,8 @@ build:
deps:
@uv pip install --upgrade -r requirements.txt
+install: deps build
+
docs:
@rm -rf docs/dev/internals docs/_build
@tox -e docs
@@ -34,6 +37,9 @@ check:
test:
@pytest -s --log-level DEBUG
+testq:
+ @pytest -q
+
numpydoc:
@python -m numpydoc.hooks.validate_docstrings $(shell find python -name "*.py" ! -name "cli.py")
diff --git a/docs/changes/DM-48427.feature.rst b/docs/changes/DM-48427.feature.rst
new file mode 100644
index 00000000..bee06ed8
--- /dev/null
+++ b/docs/changes/DM-48427.feature.rst
@@ -0,0 +1,3 @@
+Column grouping functionality was added to the data model.
+These are sets of related columns that, in addition to the standard object attributes, may have an `ivoa:ucd`.
+Additional information on column groups was added to the User Guide.
diff --git a/docs/user-guide/model.rst b/docs/user-guide/model.rst
index 8cbe9900..0494ab92 100644
--- a/docs/user-guide/model.rst
+++ b/docs/user-guide/model.rst
@@ -105,6 +105,18 @@ A column may also have the following optional properties:
.. [2] `TAP Access Protocol (TAP) specification `__
.. [3] `VOTable specification `__
+*************
+Column Groups
+*************
+
+A `column group <../dev/internals/felis.datamodel.Schema.html#felis.datamodel.ColumnGroup>`__ represents a set of related columns in a table.
+In addition to the standard column attributes, column groups have the following attributes:
+
+:``ivoa:ucd``: The `IVOA UCD `__ for this column group.
+:``columns``: The list of columns in this column group, which should be IDs of columns in the table. This is a required field.
+
+The functionality of column groups is currently limited but may be expanded in future versions of Felis, in particular to support VOTable ``GROUP`` elements.
+
.. _Constraint:
**********
diff --git a/python/felis/datamodel.py b/python/felis/datamodel.py
index 53ccbf2c..25f3dc8a 100644
--- a/python/felis/datamodel.py
+++ b/python/felis/datamodel.py
@@ -134,6 +134,32 @@ class DataType(StrEnum):
timestamp = auto()
+def validate_ivoa_ucd(ivoa_ucd: str) -> str:
+ """Validate IVOA UCD values.
+
+ Parameters
+ ----------
+ ivoa_ucd
+ IVOA UCD value to check.
+
+ Returns
+ -------
+ `str`
+ The IVOA UCD value if it is valid.
+
+ Raises
+ ------
+ ValueError
+ If the IVOA UCD value is invalid.
+ """
+ if ivoa_ucd is not None:
+ try:
+ ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd)
+ except ValueError as e:
+ raise ValueError(f"Invalid IVOA UCD: {e}")
+ return ivoa_ucd
+
+
class Column(BaseObject):
"""Column model."""
@@ -235,12 +261,7 @@ def check_ivoa_ucd(cls, ivoa_ucd: str) -> str:
`str`
The IVOA UCD value if it is valid.
"""
- if ivoa_ucd is not None:
- try:
- ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd)
- except ValueError as e:
- raise ValueError(f"Invalid IVOA UCD: {e}")
- return ivoa_ucd
+ return validate_ivoa_ucd(ivoa_ucd)
@model_validator(mode="after")
def check_units(self) -> Column:
@@ -551,6 +572,70 @@ def check_columns_or_expressions(cls, values: dict[str, Any]) -> dict[str, Any]:
"""Type alias for a constraint type."""
+ColumnRef: TypeAlias = str
+"""Type alias for a column reference."""
+
+
+class ColumnGroup(BaseObject):
+ """Column group model."""
+
+ columns: list[ColumnRef | Column] = Field(..., min_length=1)
+ """Columns in the group."""
+
+ ivoa_ucd: str | None = Field(None, alias="ivoa:ucd")
+ """IVOA UCD of the column."""
+
+ table: Table | None = None
+ """Reference to the parent table."""
+
+ @field_validator("ivoa_ucd")
+ @classmethod
+ def check_ivoa_ucd(cls, ivoa_ucd: str) -> str:
+ """Check that IVOA UCD values are valid.
+
+ Parameters
+ ----------
+ ivoa_ucd
+ IVOA UCD value to check.
+
+ Returns
+ -------
+ `str`
+ The IVOA UCD value if it is valid.
+ """
+ return validate_ivoa_ucd(ivoa_ucd)
+
+ @model_validator(mode="after")
+ def check_unique_columns(self) -> ColumnGroup:
+ """Check that the columns list contains unique items.
+
+ Returns
+ -------
+ `ColumnGroup`
+ The column group being validated.
+ """
+ column_ids = [col if isinstance(col, str) else col.id for col in self.columns]
+ if len(column_ids) != len(set(column_ids)):
+ raise ValueError("Columns in the group must be unique")
+ return self
+
+ def _dereference_columns(self) -> None:
+ """Dereference ColumnRef to Column objects."""
+ if self.table is None:
+ raise ValueError("ColumnGroup must have a reference to its parent table")
+
+ dereferenced_columns: list[ColumnRef | Column] = []
+ for col in self.columns:
+ if isinstance(col, str):
+ # Dereference ColumnRef to Column object
+ col_obj = self.table._find_column_by_id(col)
+ dereferenced_columns.append(col_obj)
+ else:
+ dereferenced_columns.append(col)
+
+ self.columns = dereferenced_columns
+
+
class Table(BaseObject):
"""Table model."""
@@ -563,6 +648,9 @@ class Table(BaseObject):
indexes: list[Index] = Field(default_factory=list)
"""Indexes on the table."""
+ column_groups: list[ColumnGroup] = Field(default_factory=list, alias="columnGroups")
+ """Column groups in the table."""
+
primary_key: str | list[str] | None = Field(None, alias="primaryKey")
"""Primary key of the table."""
@@ -653,6 +741,43 @@ def check_tap_principal(self, info: ValidationInfo) -> Table:
return self
raise ValueError(f"Table '{self.name}' is missing at least one column designated as 'tap:principal'")
+ def _find_column_by_id(self, id: str) -> Column:
+ """Find a column by ID.
+
+ Parameters
+ ----------
+ id
+ The ID of the column to find.
+
+ Returns
+ -------
+ `Column`
+ The column with the given ID.
+
+ Raises
+ ------
+ ValueError
+ Raised if the column is not found.
+ """
+ for column in self.columns:
+ if column.id == id:
+ return column
+ raise ValueError(f"Column '{id}' not found in table '{self.name}'")
+
+ @model_validator(mode="after")
+ def dereference_column_groups(self: Table) -> Table:
+ """Dereference columns in column groups.
+
+ Returns
+ -------
+ `Table`
+ The table with dereferenced column groups.
+ """
+ for group in self.column_groups:
+ group.table = self
+ group._dereference_columns()
+ return self
+
class SchemaVersion(BaseModel):
"""Schema version model."""
diff --git a/python/felis/metadata.py b/python/felis/metadata.py
index ae73d361..5a45b244 100644
--- a/python/felis/metadata.py
+++ b/python/felis/metadata.py
@@ -127,6 +127,8 @@ class MetaDataBuilder:
Whether to apply the schema name to the metadata object.
ignore_constraints
Whether to ignore constraints when building the metadata.
+ table_name_postfix
+ A string to append to the table names when building the metadata.
"""
def __init__(
diff --git a/tests/data/sales.yaml b/tests/data/sales.yaml
index 6850a879..ced19e09 100644
--- a/tests/data/sales.yaml
+++ b/tests/data/sales.yaml
@@ -22,6 +22,14 @@ tables:
datatype: string
description: Customer address
length: 100
+ columnGroups:
+ - name: customer_info
+ "@id": "#customers.customer_info"
+ description: Customer information
+ ivoa:ucd: meta
+ columns:
+ - "#customers.name"
+ - "#customers.address"
indexes:
- name: idx_name
"@id": "#customers_idx_name"
diff --git a/tests/test_datamodel.py b/tests/test_datamodel.py
index 304498d4..c95be929 100644
--- a/tests/test_datamodel.py
+++ b/tests/test_datamodel.py
@@ -31,6 +31,7 @@
from felis.datamodel import (
CheckConstraint,
Column,
+ ColumnGroup,
Constraint,
DataType,
ForeignKeyConstraint,
@@ -260,6 +261,76 @@ def test_validation(self) -> None:
Table(name="testTable", id="#test_id", columns=[testCol, testCol])
+class ColumnGroupTestCase(unittest.TestCase):
+ """Test Pydantic validation of the ``ColumnGroup`` class."""
+
+ def test_validation(self) -> None:
+ """Test Pydantic validation of the ``ColumnGroup`` class."""
+ # Default initialization should throw an exception.
+ with self.assertRaises(ValidationError):
+ ColumnGroup()
+
+ # Setting only name should throw an exception.
+ with self.assertRaises(ValidationError):
+ ColumnGroup(name="testGroup")
+
+ # Setting name and id should throw an exception from missing columns.
+ with self.assertRaises(ValidationError):
+ ColumnGroup(name="testGroup", id="#test_id")
+
+ col = Column(name="testColumn", id="#test_col", datatype="string", length=256)
+
+ # Setting name, id, and columns should not throw an exception and
+ # should load data correctly.
+ group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta")
+ self.assertEqual(group.name, "testGroup", "name should be 'testGroup'")
+ self.assertEqual(group.id, "#test_group", "id should be '#test_group'")
+ self.assertEqual(group.columns, [col], "columns should be ['testColumn']")
+
+ # Dereferencing columns without setting a table should raise an
+ # exception.
+ with self.assertRaises(ValueError):
+ group._dereference_columns()
+
+ # Creating a group with duplicate column names should raise an
+ # exception.
+ with self.assertRaises(ValidationError):
+ ColumnGroup(name="testGroup", id="#test_group", columns=[col, col])
+
+ # Check that including a column object in a group works correctly.
+ group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta")
+ table = Table(
+ name="testTable",
+ id="#test_table",
+ columns=[col],
+ column_groups=[group],
+ )
+ self.assertEqual(table.column_groups, [group], "column_groups should be [group]")
+ self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol")
+
+ # Check that column derefencing works correctly when group is assigned
+ # to a table.
+ group = ColumnGroup(name="testGroup", id="#test_group", columns=["#test_col"], ivoa_ucd="meta")
+ table = Table(
+ name="testTable",
+ id="#test_table",
+ columns=[col],
+ column_groups=[group],
+ )
+ self.assertEqual(table.column_groups, [group], "column_groups should be [group]")
+ self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol")
+
+ # Creating a group with a bad column should raise an exception.
+ group = ColumnGroup(name="testGroup", id="#test_group", columns=["#bad_col"], ivoa_ucd="meta")
+ with self.assertRaises(ValueError):
+ table = Table(
+ name="testTable",
+ id="#test_table",
+ columns=[col],
+ column_groups=[group],
+ )
+
+
class ConstraintTestCase(unittest.TestCase):
"""Test Pydantic validation of the different constraint classes."""