From 58580ee0b01643bc0e82099f1df77bcdb7c198f7 Mon Sep 17 00:00:00 2001
From: "David H. Irving" <david.irving@noirlab.edu>
Date: Tue, 19 Mar 2024 12:36:12 -0700
Subject: [PATCH] Add migration script for universe 5 -> 6

Add a script for upgrading the dimension universe config to match the daf_butler dimensions.yaml changes in DM-42636.
---
 .../migrations/dimensions-config.rst          |  15 +
 migrations/dimensions-config/1fae088c80b6.py  | 316 ++++++++++++++++++
 mypy.ini                                      |   3 +
 .../butler_migrate/_dimensions_json_utils.py  |  80 +++++
 .../daf/butler_migrate/butler_attributes.py   |  59 +++-
 python/lsst/daf/butler_migrate/naming.py      |  32 +-
 python/lsst/daf/butler_migrate/timespan.py    |  89 +++++
 tests/test_dimensions_json.py                 |  15 +-
 tests/test_dimensions_json_utils.py           |  59 ++++
 9 files changed, 661 insertions(+), 7 deletions(-)
 create mode 100644 migrations/dimensions-config/1fae088c80b6.py
 create mode 100644 python/lsst/daf/butler_migrate/_dimensions_json_utils.py
 create mode 100644 python/lsst/daf/butler_migrate/timespan.py
 create mode 100644 tests/test_dimensions_json_utils.py

diff --git a/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst b/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst
index 4ca110a..779e08a 100644
--- a/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst
+++ b/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst
@@ -71,3 +71,18 @@ Migration script: `2a8a32e1bec3.py <https://github.com/lsst-dm/daf_butler_migrat
 
 Alters ``instrument`` table schema, changes ``name`` column size to 32 from 16.
 Updates ``config:dimensions.json`` with a matching change to ``instrument`` element.
+
+daf_butler 5 to 6
+=================
+
+Migration script: `1fae088c80b6.py  <https://github.com/lsst-dm/daf_butler_migrate/blob/main/migrations/dimensions-config/1fae088c80b6.py>`_
+
+Supports group and day_obs as dimensions.
+
+- Add ``group`` table, and populate it based on the ``group_name`` field in the ``exposure`` table.
+- Add ``day_obs`` table, and populate based on the ``day_obs`` field from the
+  ``exposure`` table and timespan offsets from Butler ``Instrument`` classes.
+- Rename ``group_name`` in the exposure table to ``group``.
+- Update the ``exposure`` table so ``group`` and ``day_obs`` are foreign keys to the new tables.
+- Remove ``group_id`` from ``exposure`` table.
+- Update ``config:dimensions.json`` to universe 6.
\ No newline at end of file
diff --git a/migrations/dimensions-config/1fae088c80b6.py b/migrations/dimensions-config/1fae088c80b6.py
new file mode 100644
index 0000000..17910cc
--- /dev/null
+++ b/migrations/dimensions-config/1fae088c80b6.py
@@ -0,0 +1,316 @@
+"""Migration script for dimensions.yaml namespace=daf_butler version=6.
+
+Revision ID: 1fae088c80b6
+Revises: 2a8a32e1bec3
+Create Date: 2024-03-12 14:35:38.888572
+
+"""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import Iterator
+from typing import Any, TypeAlias
+
+import alembic
+import sqlalchemy as sa
+from alembic import op
+from lsst.daf.butler import Timespan
+from lsst.daf.butler_migrate.butler_attributes import ButlerAttributes
+from lsst.daf.butler_migrate.naming import make_string_length_constraint
+from lsst.daf.butler_migrate.timespan import create_timespan_column_definitions, format_timespan_value
+from lsst.utils import doImportType
+
+# revision identifiers, used by Alembic.
+revision = "1fae088c80b6"
+down_revision = "2a8a32e1bec3"
+branch_labels = None
+depends_on = None
+
+# Logger name should start with lsst to work with butler logging option.
+_LOG = logging.getLogger(f"lsst.{__name__}")
+
+
+def upgrade() -> None:
+    """Upgrade from version 5 to version 6 following update of dimensions.yaml in DM-42636.
+
+    - Add ``group`` table, and populate it based on the ``group_name`` field in
+      the ``exposure`` table.
+    - Add ``day_obs`` table, and populate based on the ``day_obs`` field from
+      the ``exposure`` table and timespan offsets from Butler ``Instrument``
+      classes.
+    - Rename ``group_name`` in the exposure table to ``group``.
+    - Update the ``exposure`` table so ``group`` and ``day_obs`` are foreign
+      keys to the new tables.
+    - Remove ``group_id`` from ``exposure`` table.
+    - Update ``config:dimensions.json`` to universe 6.
+    """
+    ctx = _Context()
+    _lock_exposure_table(ctx)
+    _validate_initial_dimension_universe(ctx)
+    _migrate_day_obs(ctx)
+    _migrate_groups(ctx)
+    _migrate_dimensions_json(ctx)
+
+
+def downgrade() -> None:
+    """Perform schema downgrade."""
+    raise NotImplementedError()
+
+
+def _lock_exposure_table(ctx: _Context) -> None:
+    # In this migration we generate new tables based on the content of the
+    # exposure table, so make sure that it is not modified while we are
+    # working.
+
+    if ctx.is_sqlite:
+        # Sqlite does not support table locks
+        return
+
+    _LOG.info("Locking exposure table")
+    schema = ""
+    if ctx.schema:
+        schema = f"{ctx.schema}."
+    ctx.bind.execute(sa.text(f"LOCK TABLE {schema}exposure IN EXCLUSIVE MODE"))
+
+
+def _validate_initial_dimension_universe(ctx: _Context) -> None:
+    config = ctx.mig_context.config
+    allow_mismatch = config is not None and "1" == config.get_section_option(
+        "daf_butler_migrate_options", "allow_dimension_universe_mismatch"
+    )
+    if not allow_mismatch:
+        _LOG.info("Checking that this is an unmodified daf_butler universe 5 repo")
+        try:
+            ctx.attributes.validate_dimensions_json(5)
+        except ValueError as e:
+            e.add_note(
+                "Repositories originally created at dimension universe 1 or earlier may have incorrect"
+                " documentation strings.\n"
+                "Re-run butler migrate with the flag '--options allow_dimension_universe_mismatch=1' to"
+                " bypass this check.\n"
+                "This will overwrite any customizations made to the dimension universe."
+            )
+            raise
+
+
+def _migrate_groups(ctx: _Context) -> None:
+    # Create group table
+    _LOG.info("Creating group table")
+    check_constraints = []
+    if ctx.is_sqlite:
+        check_constraints = [make_string_length_constraint("instrument", 32, "group_len_instrument")]
+    table = op.create_table(
+        "group",
+        sa.Column("instrument", sa.String(32), primary_key=True),
+        sa.Column("name", sa.Text, primary_key=True),
+        sa.schema.ForeignKeyConstraint(
+            columns=["instrument"],
+            refcolumns=[ctx.get_table("instrument").c.name],
+            name="fkey_group_instrument_name_instrument",
+        ),
+        *check_constraints,
+        schema=ctx.schema,
+    )
+
+    # Populate group table based on the data in the exposure table.
+    _LOG.info("Populating group table")
+    exposure_table = ctx.get_table("exposure")
+    select = sa.select(
+        exposure_table.columns["instrument"],
+        exposure_table.columns["group_name"],
+    ).distinct()
+    op.execute(
+        table.insert().from_select(
+            [
+                "instrument",
+                "name",
+            ],
+            select,
+        )
+    )
+
+    # Create index on instrument
+    _LOG.info("Creating instrument index for group table")
+    op.create_index(
+        "group_fkidx_instrument",
+        "group",
+        ["instrument"],
+        schema=ctx.schema,
+    )
+
+    # Update the exposure table to reference the group table.
+    _LOG.info("Updating exposure table to reference group table")
+    with op.batch_alter_table("exposure", schema=ctx.schema) as batch_op:
+        batch_op.alter_column("group_name", new_column_name="group", nullable=False)
+        batch_op.drop_column("group_id")
+
+    # In theory we should do this create_foreign_key as part of the batch
+    # above.  However, there is some undocumented weirdness with the column
+    # rename from "group_name" to "group".  When done in the batch above, this
+    # foreign key only works if you specify the original column name instead of
+    # the final one.  This seems fragile (and is likely incompatible with
+    # Postgres, which ignores the batching). So do it in a separate batch.
+    with op.batch_alter_table("exposure", schema=ctx.schema) as batch_op:
+        batch_op.create_foreign_key(
+            constraint_name="fkey_exposure_group_instrument_name_instrument_group",
+            referent_table="group",
+            local_cols=["instrument", "group"],
+            remote_cols=["instrument", "name"],
+            referent_schema=ctx.schema,
+        )
+
+    # Create index on exposure for group fkey
+    op.create_index(
+        "exposure_fkidx_instrument_group",
+        "exposure",
+        ["instrument", "group"],
+        schema=ctx.schema,
+    )
+
+
+def _migrate_day_obs(ctx: _Context) -> None:
+    # Before doing anything else, generate the rows for the new day_obs table
+    # from the data in the exposure table.  This is prone to failure due to the
+    # need to import instrument classes.
+    _LOG.info("Generating data for day_obs table from exposure_table")
+    day_obs_rows = list(_generate_day_obs_rows(ctx))
+
+    # Create day_obs table
+    _LOG.info("Creating day_obs table")
+    check_constraints = []
+    if ctx.is_sqlite:
+        check_constraints = [make_string_length_constraint("instrument", 32, "day_obs_len_instrument")]
+
+    table = op.create_table(
+        "day_obs",
+        sa.Column("instrument", sa.String(32), primary_key=True),
+        sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=False),
+        *create_timespan_column_definitions("timespan", ctx.dialect),
+        sa.schema.ForeignKeyConstraint(
+            columns=["instrument"],
+            refcolumns=[ctx.get_table("instrument").c.name],
+            name="fkey_day_obs_instrument_name_instrument",
+        ),
+        *check_constraints,
+        schema=ctx.schema,
+    )
+
+    # Populate the day_obs table based on the data in the exposure table.
+    _LOG.info("Populating day_obs table")
+    op.bulk_insert(table, day_obs_rows)
+
+    # Create index on instrument
+    _LOG.info("Creating instrument index for day_obs table")
+    op.create_index(
+        "day_obs_fkidx_instrument",
+        "day_obs",
+        ["instrument"],
+        schema=ctx.schema,
+    )
+
+    # Update exposure table to reference day_obs table
+    _LOG.info("Updating exposure table to reference day_obs table")
+    with op.batch_alter_table("exposure", schema=ctx.schema) as batch_op:
+        batch_op.alter_column("day_obs", nullable=False)
+        batch_op.create_foreign_key(
+            constraint_name="fkey_exposure_day_obs_instrument_id_instrument_day_obs",
+            referent_table="day_obs",
+            local_cols=["instrument", "day_obs"],
+            remote_cols=["instrument", "id"],
+            referent_schema=ctx.schema,
+        )
+
+    # Create index on exposure for day_obs fkey
+    op.create_index(
+        "exposure_fkidx_instrument_day_obs",
+        "exposure",
+        ["instrument", "day_obs"],
+        schema=ctx.schema,
+    )
+
+
+def _migrate_dimensions_json(ctx: _Context) -> None:
+    _LOG.info("Updating dimensions.json in ButlerAttributes")
+    ctx.attributes.replace_dimensions_json(6)
+
+
+def _generate_day_obs_rows(ctx: _Context) -> Iterator[dict]:
+    exposure_table = ctx.get_table("exposure")
+    select = sa.select(
+        exposure_table.columns["instrument"],
+        exposure_table.columns["day_obs"],
+    ).distinct()
+    rows = ctx.bind.execute(select).all()
+
+    instrument_fetcher = _InstrumentFetcher(ctx)
+    for row in rows:
+        day_obs = row.day_obs
+
+        # Different instruments define the start and end times for day_obs differently.
+        instrument_name = row.instrument
+        instrument_class = instrument_fetcher.get_instrument(instrument_name)
+        offset = _get_day_obs_offset(instrument_name, instrument_class, day_obs)
+
+        timespan = Timespan.from_day_obs(day_obs, offset)
+        yield {
+            "instrument": row.instrument,
+            "id": day_obs,
+            **format_timespan_value(timespan, "timespan", ctx.dialect),
+        }
+
+
+def _get_day_obs_offset(instrument_name: str, instrument: _Instrument, day_obs: int) -> int:
+    day_as_astropy_time = Timespan.from_day_obs(day_obs, 0).begin
+    translator = instrument.translatorClass
+    if translator is None:
+        raise TypeError(
+            f"Instrument {instrument_name} does not have a translatorClass defined,"
+            " cannot determine offset for day_obs."
+        )
+    offset = translator.observing_date_to_offset(day_as_astropy_time)
+    # Convert astropy TimeDelta to integer seconds.
+    return round(offset.to_value("s"))
+
+
+class _Context:
+    def __init__(self) -> None:
+        self.mig_context = alembic.context.get_context()
+        self.schema = self.mig_context.version_table_schema
+        bind = self.mig_context.bind
+        assert bind is not None, "Can't run offline -- need access to database to migrate data."
+        self.bind = bind
+        self.dialect = self.bind.dialect.name
+        self.is_sqlite = self.dialect == "sqlite"
+        self.metadata = sa.schema.MetaData(schema=self.schema)
+        self.attributes = ButlerAttributes(self.bind, self.schema)
+
+    def get_table(self, table_name: str) -> sa.Table:
+        return sa.schema.Table(table_name, self.metadata, autoload_with=self.bind, schema=self.schema)
+
+
+_Instrument: TypeAlias = Any
+"""A dynamically loaded lsst.obs_base.Instrument."""
+
+
+class _InstrumentFetcher:
+    def __init__(self, ctx: _Context) -> None:
+        self._instruments: dict[str, _Instrument] = {}
+        self._ctx = ctx
+
+    def get_instrument(self, name: str) -> _Instrument:
+        """Dynamically load an lsst.obs_base.Instrument based on its class name stored in the database."""
+        instrument = self._instruments.get(name)
+        if instrument is not None:
+            return instrument
+
+        instrument_table = self._ctx.get_table("instrument")
+        rows = self._ctx.bind.execute(
+            sa.select(instrument_table.c.class_name).where(instrument_table.c.name == name)
+        ).all()
+        assert len(rows) == 1, f"Should be exactly one class name for instrument {name}"
+        class_name = rows[0][0]
+        _LOG.info(f"Loading instrument definition {name} from class {class_name}")
+        instrument = doImportType(class_name)()
+        self._instruments[name] = instrument
+        return instrument
diff --git a/mypy.ini b/mypy.ini
index dfc246d..70507e0 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -9,6 +9,9 @@ disallow_incomplete_defs = True
 [mypy-sqlalchemy.*]
 ignore_missing_imports = True
 
+[mypy-yaml.*]
+ignore_missing_imports = True
+
 [mypy-astropy.*]
 ignore_missing_imports = True
 
diff --git a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py
new file mode 100644
index 0000000..6a888f5
--- /dev/null
+++ b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py
@@ -0,0 +1,80 @@
+# This file is part of daf_butler_migrate.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import difflib
+import json
+
+
+def load_historical_dimension_universe_json(universe_version: int) -> str:
+    """Load a specific version of the default dimension universe as JSON.
+
+    Parameters
+    ----------
+    universe_version : `int`
+        Version number of the universe to be loaded.
+
+    Returns
+    -------
+    universe : `str`
+        Dimension universe configuration encoded as a JSON string.
+    """
+    import yaml
+    from lsst.resources import ResourcePath
+
+    path = ResourcePath(
+        f"resource://lsst.daf.butler/configs/old_dimensions/daf_butler_universe{universe_version}.yaml"
+    )
+    with path.open() as input:
+        dimensions = yaml.safe_load(input)
+    return json.dumps(dimensions)
+
+
+def compare_json_strings(expected: str, actual: str) -> str | None:
+    """Compare two JSON strings and return a human-readable description of
+    the differences.
+
+    Parameters
+    ----------
+    expected : `str`
+        JSON-encoded string to use as the basis for comparison.
+    actual : `str`
+        JSON-encoded string to compare with the expected value.
+
+    Returns
+    -------
+    diff : `str` | `None`
+        If the two inputs parse as equivalent data, returns `None`.  If there
+        are differences between the two inputs, returns a human-readable string
+        describing the differences.
+    """
+    expected = _normalize_json_string(expected)
+    actual = _normalize_json_string(actual)
+
+    if expected == actual:
+        return None
+
+    diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="")
+    return "\n".join(diff)
+
+
+def _normalize_json_string(json_string: str) -> str:
+    # Re-encode a JSON string in a standardized format with sorted keys.
+    return json.dumps(json.loads(json_string), indent=2, sort_keys=True)
diff --git a/python/lsst/daf/butler_migrate/butler_attributes.py b/python/lsst/daf/butler_migrate/butler_attributes.py
index 5b3bd8f..7b4b619 100644
--- a/python/lsst/daf/butler_migrate/butler_attributes.py
+++ b/python/lsst/daf/butler_migrate/butler_attributes.py
@@ -27,6 +27,10 @@
 
 import sqlalchemy
 
+from ._dimensions_json_utils import compare_json_strings, load_historical_dimension_universe_json
+
+_DIMENSIONS_JSON_KEY = "config:dimensions.json"
+
 
 class ButlerAttributes:
     """Helper class implementing updates for butler_attributes table.
@@ -163,12 +167,15 @@ def get_dimensions_json(self) -> dict[str, Any]:
         config : `dict`
             Contents of ``dimensions.json`` as dictionary.
         """
-        key = "config:dimensions.json"
+        config = json.loads(self._load_dimensions_json())
+        return config
+
+    def _load_dimensions_json(self) -> str:
+        key = _DIMENSIONS_JSON_KEY
         config_json = self.get(key)
         if config_json is None:
             raise LookupError(f"Key {key} does not exist in attributes table")
-        config = json.loads(config_json)
-        return config
+        return config_json
 
     def update_dimensions_json(self, update_config: Callable[[dict], dict]) -> None:
         """Update dimensions definitions in dimensions.json.
@@ -179,7 +186,7 @@ def update_dimensions_json(self, update_config: Callable[[dict], dict]) -> None:
             A method that takes a dictionary representation of the
             ``dimensions.json`` and returns an updated dictionary.
         """
-        key = "config:dimensions.json"
+        key = _DIMENSIONS_JSON_KEY
         config_json = self.get(key)
         if config_json is None:
             raise LookupError(f"Key {key} does not exist in attributes table")
@@ -190,3 +197,47 @@ def update_dimensions_json(self, update_config: Callable[[dict], dict]) -> None:
 
         config_json = json.dumps(config)
         self.update(key, config_json)
+
+    def validate_dimensions_json(self, expected_universe_version: int) -> None:
+        """
+        Compare the dimensions.json definition stored in the attributes table
+        with the default daf_butler dimensions.json at a specific version, and
+        raise an exception if they do not match.
+
+        Parameters
+        ----------
+        expected_universe_version : `int`
+            Version number of the daf_butler universe that we expect to find in
+            the DB.
+
+        Raises
+        ------
+        ValueError
+            If the dimension universe stored in the database does not match the
+            expected value.
+        """
+        expected_json = load_historical_dimension_universe_json(expected_universe_version)
+        actual_json = self._load_dimensions_json()
+        diff = compare_json_strings(expected_json, actual_json)
+        if diff is not None:
+            err = ValueError(
+                "dimensions.json stored in database does not match expected"
+                f" daf_butler universe version {expected_universe_version}."
+            )
+            err.add_note(f"Differences:\n\n{diff}")
+            raise err
+
+        return None
+
+    def replace_dimensions_json(self, universe_version: int) -> None:
+        """Replace the dimensions.json definition stored in the attributes
+        table to match the default daf_butler dimensions.json at a specific
+        version.
+
+        Parameters
+        ----------
+        universe_version : `int`
+            Version number for the daf_butler universe to be saved in the DB.
+        """
+        dimensions = load_historical_dimension_universe_json(universe_version)
+        self.update(_DIMENSIONS_JSON_KEY, dimensions)
diff --git a/python/lsst/daf/butler_migrate/naming.py b/python/lsst/daf/butler_migrate/naming.py
index 3bf7eea..c7d9d79 100644
--- a/python/lsst/daf/butler_migrate/naming.py
+++ b/python/lsst/daf/butler_migrate/naming.py
@@ -32,13 +32,13 @@
 
 from typing import TYPE_CHECKING
 
+import sqlalchemy
+
 from .shrink import shrinkDatabaseEntityName
 
 if TYPE_CHECKING:
     from collections.abc import Iterable
 
-    import sqlalchemy
-
 
 def primary_key_name(table: str, bind: sqlalchemy.engine.Connection) -> str:
     """Return name of a primary key constraint for a table.
@@ -180,3 +180,31 @@ def is_foreign_key_index(table: str, index_name: str) -> bool:
 
 def is_regular_index(table: str, index_name: str) -> bool:
     return index_name.startswith(f"{table}_idx_")
+
+
+def make_string_length_constraint(
+    column_name: str, max_length: int, constraint_name: str
+) -> sqlalchemy.schema.CheckConstraint:
+    """Create a check constraint that guarantees a string column has a length
+    that is non-zero and less than a specified maximum.
+
+    These constraints are used by Butler in sqlite databases to emulate
+    VARCHARs with a specific length.
+
+    Parameters
+    ----------
+    column_name : `str`
+        The name of the column to create the constraint on.
+    max_length : `int`
+        The maximum length allowed for strings stored in this column.
+    constraint_name : `str`
+        An arbitrary identifier for the constraint.
+
+    Returns
+    -------
+    check_constraint : `sqlalchemy.schema.CheckConstraint`
+        The generated check constraint.
+    """
+    return sqlalchemy.schema.CheckConstraint(
+        f'length("{column_name}")<={max_length} AND length("{column_name}")>=1', name=constraint_name
+    )
diff --git a/python/lsst/daf/butler_migrate/timespan.py b/python/lsst/daf/butler_migrate/timespan.py
new file mode 100644
index 0000000..22aa5a7
--- /dev/null
+++ b/python/lsst/daf/butler_migrate/timespan.py
@@ -0,0 +1,89 @@
+# This file is part of daf_butler_migrate.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+from typing import Any
+
+import sqlalchemy as sa
+from lsst.daf.butler import Timespan
+from sqlalchemy.dialects.postgresql import INT8RANGE, Range
+
+
+def create_timespan_column_definitions(column_name: str, dialect: str) -> list[sa.Column]:
+    """Generate timespan column definitions for a given SQL dialect.
+
+    Parameters
+    ----------
+    column_name : `str`
+        The name of the column to generate, or the prefix if multiple columns
+        are generated.
+    dialect : `str`
+        The SQL dialect we are generating columns for (``sqlite`` or
+        ``postgres``).
+
+    Returns
+    -------
+    columns : `list` [ `sqlalchemy.Column` ]
+        SQLAlchemy column definitions.
+    """
+    if dialect == "postgresql":
+        # Postgres uses a non-standard range datatype for representing
+        # timespans.
+        return [sa.Column(column_name, INT8RANGE)]
+    elif dialect == "sqlite":
+        return [
+            sa.Column(f"{column_name}_begin", sa.BigInteger),
+            sa.Column(f"{column_name}_end", sa.BigInteger),
+        ]
+    else:
+        raise ValueError(f"Unhandled SQL dialect {dialect}")
+
+
+def format_timespan_value(timespan: Timespan, column_name: str, dialect: str) -> dict[str, Any]:
+    """Format timespan values for insertion into a table using SQLAlchemy.
+
+    Parameters
+    ----------
+    timespan : `Timespan`
+        Value being formatted.
+    column_name : `str`
+        The name of the timespan column, or their prefix if the dialect uses
+        multiple columns.
+    dialect : `str`
+        The SQL dialect we are generating values for (``sqlite`` or
+        ``postgres``).
+
+    Returns
+    -------
+    values : `dict` [ `str`, `typing.Any` ]
+        Mapping from column name to value for that column.
+    """
+    nanoseconds = timespan.to_simple()
+    if dialect == "postgresql":
+        return {column_name: Range(*nanoseconds)}
+    elif dialect == "sqlite":
+        return {
+            f"{column_name}_begin": nanoseconds[0],
+            f"{column_name}_end": nanoseconds[1],
+        }
+    else:
+        raise ValueError(f"Unhandled SQL dialect {dialect}")
diff --git a/tests/test_dimensions_json.py b/tests/test_dimensions_json.py
index 5dbce4c..15ee3c2 100644
--- a/tests/test_dimensions_json.py
+++ b/tests/test_dimensions_json.py
@@ -27,7 +27,7 @@
 from lsst.daf.butler.registry.sql_registry import SqlRegistry
 from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
 from lsst.daf.butler.transfers import YamlRepoImportBackend
-from lsst.daf.butler_migrate import database, migrate, script
+from lsst.daf.butler_migrate import butler_attributes, database, migrate, script
 
 TESTDIR = os.path.abspath(os.path.dirname(__file__))
 
@@ -200,6 +200,19 @@ def test_upgrade_v2(self) -> None:
             ],
         )
 
+    def test_validate_dimensions_json(self) -> None:
+        self.make_butler_v0()
+        universe = 5
+        with self.db.connect() as connection:
+            attribs = butler_attributes.ButlerAttributes(connection)
+            with self.assertRaisesRegex(
+                ValueError, "dimensions.json stored in database does not match expected"
+            ):
+                attribs.validate_dimensions_json(universe)
+
+            attribs.replace_dimensions_json(universe)
+            self.assertIsNone(attribs.validate_dimensions_json(universe))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_dimensions_json_utils.py b/tests/test_dimensions_json_utils.py
new file mode 100644
index 0000000..4106921
--- /dev/null
+++ b/tests/test_dimensions_json_utils.py
@@ -0,0 +1,59 @@
+# This file is part of daf_butler_migrate.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (http://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import json
+import unittest
+
+from lsst.daf.butler_migrate._dimensions_json_utils import (
+    compare_json_strings,
+    load_historical_dimension_universe_json,
+)
+
+
+class DimensionUtilsTestCase(unittest.TestCase):
+    """Test dimensions JSON utility functions."""
+
+    def test_universe_load(self) -> None:
+        self._check_universe_load(5)
+        self._check_universe_load(6)
+
+    def _check_universe_load(self, version: int) -> None:
+        universe = load_historical_dimension_universe_json(version)
+        loaded_version_number = json.loads(universe)["version"]
+        self.assertEqual(loaded_version_number, version)
+
+    def test_equal_json_strings(self) -> None:
+        a = '{ "a": {"b": 1, "c": 2}}'
+        b = '{ "a": {"c": 2,        "b": 1}}'
+        self.assertIsNone(compare_json_strings(a, b))
+
+    def test_non_equal_json_strings(self) -> None:
+        a = '{ "a": 1 }'
+        b = '{ "a": {"c": 2,        "b": 1}}'
+        diff = compare_json_strings(a, b)
+        self.assertEqual(
+            diff,
+            """--- \n+++ \n@@ -1,3 +1,6 @@\n {\n-  "a": 1\n+  "a": {\n+    "b": 1,\n+    "c": 2\n+  }\n }""",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()