Skip to content

Commit

Permalink
Merge pull request #127 from lsst/tickets/DM-48427
Browse files Browse the repository at this point in the history
DM-48427: Add column groups
  • Loading branch information
JeremyMcCormick authored Jan 28, 2025
2 parents 8294325 + da3a7f2 commit 9962b0d
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 7 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,3 @@ python/*.dist-info/

# VS Code workspace dir
.vscode

6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ help:
@echo " docs - Generate the documentation"
@echo " check - Run pre-commit checks"
@echo " test - Run tests"
@echo " testq - Run tests quietly"
@echo " numpydoc - Check numpydoc style"
@echo " mypy - Run mypy static type checker"
@echo " all - Run all tasks"
Expand All @@ -24,6 +25,8 @@ build:
deps:
@uv pip install --upgrade -r requirements.txt

install: deps build

docs:
@rm -rf docs/dev/internals docs/_build
@tox -e docs
Expand All @@ -34,6 +37,9 @@ check:
test:
@pytest -s --log-level DEBUG

testq:
@pytest -q

numpydoc:
@python -m numpydoc.hooks.validate_docstrings $(shell find python -name "*.py" ! -name "cli.py")

Expand Down
3 changes: 3 additions & 0 deletions docs/changes/DM-48427.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Column grouping functionality was added to the data model.
These are sets of related columns that, in addition to the standard object attributes, may have an `ivoa:ucd`.
Additional information on column groups was added to the User Guide.
12 changes: 12 additions & 0 deletions docs/user-guide/model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,18 @@ A column may also have the following optional properties:
.. [2] `TAP Access Protocol (TAP) specification <https://www.ivoa.net/documents/TAP/>`__
.. [3] `VOTable specification <http://www.ivoa.net/documents/VOTable/>`__
*************
Column Groups
*************

A `column group <../dev/internals/felis.datamodel.Schema.html#felis.datamodel.ColumnGroup>`__ represents a set of related columns in a table.
In addition to the standard column attributes, column groups have the following attributes:

:``ivoa:ucd``: The `IVOA UCD <http://www.ivoa.net/documents/latest/UCD.html>`__ for this column group.
:``columns``: The list of columns in this column group, which should be IDs of columns in the table. This is a required field.

The functionality of column groups is currently limited but may be expanded in future versions of Felis, in particular to support VOTable ``GROUP`` elements.

.. _Constraint:

**********
Expand Down
137 changes: 131 additions & 6 deletions python/felis/datamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,32 @@ class DataType(StrEnum):
timestamp = auto()


def validate_ivoa_ucd(ivoa_ucd: str) -> str:
"""Validate IVOA UCD values.
Parameters
----------
ivoa_ucd
IVOA UCD value to check.
Returns
-------
`str`
The IVOA UCD value if it is valid.
Raises
------
ValueError
If the IVOA UCD value is invalid.
"""
if ivoa_ucd is not None:
try:
ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd)
except ValueError as e:
raise ValueError(f"Invalid IVOA UCD: {e}")
return ivoa_ucd


class Column(BaseObject):
"""Column model."""

Expand Down Expand Up @@ -235,12 +261,7 @@ def check_ivoa_ucd(cls, ivoa_ucd: str) -> str:
`str`
The IVOA UCD value if it is valid.
"""
if ivoa_ucd is not None:
try:
ucd.parse_ucd(ivoa_ucd, check_controlled_vocabulary=True, has_colon=";" in ivoa_ucd)
except ValueError as e:
raise ValueError(f"Invalid IVOA UCD: {e}")
return ivoa_ucd
return validate_ivoa_ucd(ivoa_ucd)

@model_validator(mode="after")
def check_units(self) -> Column:
Expand Down Expand Up @@ -551,6 +572,70 @@ def check_columns_or_expressions(cls, values: dict[str, Any]) -> dict[str, Any]:
"""Type alias for a constraint type."""


ColumnRef: TypeAlias = str
"""Type alias for a column reference."""


class ColumnGroup(BaseObject):
"""Column group model."""

columns: list[ColumnRef | Column] = Field(..., min_length=1)
"""Columns in the group."""

ivoa_ucd: str | None = Field(None, alias="ivoa:ucd")
"""IVOA UCD of the column."""

table: Table | None = None
"""Reference to the parent table."""

@field_validator("ivoa_ucd")
@classmethod
def check_ivoa_ucd(cls, ivoa_ucd: str) -> str:
"""Check that IVOA UCD values are valid.
Parameters
----------
ivoa_ucd
IVOA UCD value to check.
Returns
-------
`str`
The IVOA UCD value if it is valid.
"""
return validate_ivoa_ucd(ivoa_ucd)

@model_validator(mode="after")
def check_unique_columns(self) -> ColumnGroup:
"""Check that the columns list contains unique items.
Returns
-------
`ColumnGroup`
The column group being validated.
"""
column_ids = [col if isinstance(col, str) else col.id for col in self.columns]
if len(column_ids) != len(set(column_ids)):
raise ValueError("Columns in the group must be unique")
return self

def _dereference_columns(self) -> None:
"""Dereference ColumnRef to Column objects."""
if self.table is None:
raise ValueError("ColumnGroup must have a reference to its parent table")

dereferenced_columns: list[ColumnRef | Column] = []
for col in self.columns:
if isinstance(col, str):
# Dereference ColumnRef to Column object
col_obj = self.table._find_column_by_id(col)
dereferenced_columns.append(col_obj)
else:
dereferenced_columns.append(col)

self.columns = dereferenced_columns


class Table(BaseObject):
"""Table model."""

Expand All @@ -563,6 +648,9 @@ class Table(BaseObject):
indexes: list[Index] = Field(default_factory=list)
"""Indexes on the table."""

column_groups: list[ColumnGroup] = Field(default_factory=list, alias="columnGroups")
"""Column groups in the table."""

primary_key: str | list[str] | None = Field(None, alias="primaryKey")
"""Primary key of the table."""

Expand Down Expand Up @@ -653,6 +741,43 @@ def check_tap_principal(self, info: ValidationInfo) -> Table:
return self
raise ValueError(f"Table '{self.name}' is missing at least one column designated as 'tap:principal'")

def _find_column_by_id(self, id: str) -> Column:
"""Find a column by ID.
Parameters
----------
id
The ID of the column to find.
Returns
-------
`Column`
The column with the given ID.
Raises
------
ValueError
Raised if the column is not found.
"""
for column in self.columns:
if column.id == id:
return column
raise ValueError(f"Column '{id}' not found in table '{self.name}'")

@model_validator(mode="after")
def dereference_column_groups(self: Table) -> Table:
"""Dereference columns in column groups.
Returns
-------
`Table`
The table with dereferenced column groups.
"""
for group in self.column_groups:
group.table = self
group._dereference_columns()
return self


class SchemaVersion(BaseModel):
"""Schema version model."""
Expand Down
2 changes: 2 additions & 0 deletions python/felis/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ class MetaDataBuilder:
Whether to apply the schema name to the metadata object.
ignore_constraints
Whether to ignore constraints when building the metadata.
table_name_postfix
A string to append to the table names when building the metadata.
"""

def __init__(
Expand Down
8 changes: 8 additions & 0 deletions tests/data/sales.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ tables:
datatype: string
description: Customer address
length: 100
columnGroups:
- name: customer_info
"@id": "#customers.customer_info"
description: Customer information
ivoa:ucd: meta
columns:
- "#customers.name"
- "#customers.address"
indexes:
- name: idx_name
"@id": "#customers_idx_name"
Expand Down
71 changes: 71 additions & 0 deletions tests/test_datamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from felis.datamodel import (
CheckConstraint,
Column,
ColumnGroup,
Constraint,
DataType,
ForeignKeyConstraint,
Expand Down Expand Up @@ -260,6 +261,76 @@ def test_validation(self) -> None:
Table(name="testTable", id="#test_id", columns=[testCol, testCol])


class ColumnGroupTestCase(unittest.TestCase):
"""Test Pydantic validation of the ``ColumnGroup`` class."""

def test_validation(self) -> None:
"""Test Pydantic validation of the ``ColumnGroup`` class."""
# Default initialization should throw an exception.
with self.assertRaises(ValidationError):
ColumnGroup()

# Setting only name should throw an exception.
with self.assertRaises(ValidationError):
ColumnGroup(name="testGroup")

# Setting name and id should throw an exception from missing columns.
with self.assertRaises(ValidationError):
ColumnGroup(name="testGroup", id="#test_id")

col = Column(name="testColumn", id="#test_col", datatype="string", length=256)

# Setting name, id, and columns should not throw an exception and
# should load data correctly.
group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta")
self.assertEqual(group.name, "testGroup", "name should be 'testGroup'")
self.assertEqual(group.id, "#test_group", "id should be '#test_group'")
self.assertEqual(group.columns, [col], "columns should be ['testColumn']")

# Dereferencing columns without setting a table should raise an
# exception.
with self.assertRaises(ValueError):
group._dereference_columns()

# Creating a group with duplicate column names should raise an
# exception.
with self.assertRaises(ValidationError):
ColumnGroup(name="testGroup", id="#test_group", columns=[col, col])

# Check that including a column object in a group works correctly.
group = ColumnGroup(name="testGroup", id="#test_group", columns=[col], ivoa_ucd="meta")
table = Table(
name="testTable",
id="#test_table",
columns=[col],
column_groups=[group],
)
self.assertEqual(table.column_groups, [group], "column_groups should be [group]")
self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol")

# Check that column derefencing works correctly when group is assigned
# to a table.
group = ColumnGroup(name="testGroup", id="#test_group", columns=["#test_col"], ivoa_ucd="meta")
table = Table(
name="testTable",
id="#test_table",
columns=[col],
column_groups=[group],
)
self.assertEqual(table.column_groups, [group], "column_groups should be [group]")
self.assertEqual(col, table.column_groups[0].columns[0], "column_groups[0] should be testCol")

# Creating a group with a bad column should raise an exception.
group = ColumnGroup(name="testGroup", id="#test_group", columns=["#bad_col"], ivoa_ucd="meta")
with self.assertRaises(ValueError):
table = Table(
name="testTable",
id="#test_table",
columns=[col],
column_groups=[group],
)


class ConstraintTestCase(unittest.TestCase):
"""Test Pydantic validation of the different constraint classes."""

Expand Down

0 comments on commit 9962b0d

Please sign in to comment.