From 58580ee0b01643bc0e82099f1df77bcdb7c198f7 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Tue, 19 Mar 2024 12:36:12 -0700 Subject: [PATCH] Add migration script for universe 5 -> 6 Add a script for upgrading the dimension universe config to match the daf_butler dimensions.yaml changes in DM-42636. --- .../migrations/dimensions-config.rst | 15 + migrations/dimensions-config/1fae088c80b6.py | 316 ++++++++++++++++++ mypy.ini | 3 + .../butler_migrate/_dimensions_json_utils.py | 80 +++++ .../daf/butler_migrate/butler_attributes.py | 59 +++- python/lsst/daf/butler_migrate/naming.py | 32 +- python/lsst/daf/butler_migrate/timespan.py | 89 +++++ tests/test_dimensions_json.py | 15 +- tests/test_dimensions_json_utils.py | 59 ++++ 9 files changed, 661 insertions(+), 7 deletions(-) create mode 100644 migrations/dimensions-config/1fae088c80b6.py create mode 100644 python/lsst/daf/butler_migrate/_dimensions_json_utils.py create mode 100644 python/lsst/daf/butler_migrate/timespan.py create mode 100644 tests/test_dimensions_json_utils.py diff --git a/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst b/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst index 4ca110a..779e08a 100644 --- a/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst +++ b/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst @@ -71,3 +71,18 @@ Migration script: `2a8a32e1bec3.py `_ + +Supports group and day_obs as dimensions. + +- Add ``group`` table, and populate it based on the ``group_name`` field in the ``exposure`` table. +- Add ``day_obs`` table, and populate based on the ``day_obs`` field from the + ``exposure`` table and timespan offsets from Butler ``Instrument`` classes. +- Rename ``group_name`` in the exposure table to ``group``. +- Update the ``exposure`` table so ``group`` and ``day_obs`` are foreign keys to the new tables. +- Remove ``group_id`` from ``exposure`` table. +- Update ``config:dimensions.json`` to universe 6. \ No newline at end of file diff --git a/migrations/dimensions-config/1fae088c80b6.py b/migrations/dimensions-config/1fae088c80b6.py new file mode 100644 index 0000000..17910cc --- /dev/null +++ b/migrations/dimensions-config/1fae088c80b6.py @@ -0,0 +1,316 @@ +"""Migration script for dimensions.yaml namespace=daf_butler version=6. + +Revision ID: 1fae088c80b6 +Revises: 2a8a32e1bec3 +Create Date: 2024-03-12 14:35:38.888572 + +""" + +from __future__ import annotations + +import logging +from collections.abc import Iterator +from typing import Any, TypeAlias + +import alembic +import sqlalchemy as sa +from alembic import op +from lsst.daf.butler import Timespan +from lsst.daf.butler_migrate.butler_attributes import ButlerAttributes +from lsst.daf.butler_migrate.naming import make_string_length_constraint +from lsst.daf.butler_migrate.timespan import create_timespan_column_definitions, format_timespan_value +from lsst.utils import doImportType + +# revision identifiers, used by Alembic. +revision = "1fae088c80b6" +down_revision = "2a8a32e1bec3" +branch_labels = None +depends_on = None + +# Logger name should start with lsst to work with butler logging option. +_LOG = logging.getLogger(f"lsst.{__name__}") + + +def upgrade() -> None: + """Upgrade from version 5 to version 6 following update of dimensions.yaml in DM-42636. + + - Add ``group`` table, and populate it based on the ``group_name`` field in + the ``exposure`` table. + - Add ``day_obs`` table, and populate based on the ``day_obs`` field from + the ``exposure`` table and timespan offsets from Butler ``Instrument`` + classes. + - Rename ``group_name`` in the exposure table to ``group``. + - Update the ``exposure`` table so ``group`` and ``day_obs`` are foreign + keys to the new tables. + - Remove ``group_id`` from ``exposure`` table. + - Update ``config:dimensions.json`` to universe 6. + """ + ctx = _Context() + _lock_exposure_table(ctx) + _validate_initial_dimension_universe(ctx) + _migrate_day_obs(ctx) + _migrate_groups(ctx) + _migrate_dimensions_json(ctx) + + +def downgrade() -> None: + """Perform schema downgrade.""" + raise NotImplementedError() + + +def _lock_exposure_table(ctx: _Context) -> None: + # In this migration we generate new tables based on the content of the + # exposure table, so make sure that it is not modified while we are + # working. + + if ctx.is_sqlite: + # Sqlite does not support table locks + return + + _LOG.info("Locking exposure table") + schema = "" + if ctx.schema: + schema = f"{ctx.schema}." + ctx.bind.execute(sa.text(f"LOCK TABLE {schema}exposure IN EXCLUSIVE MODE")) + + +def _validate_initial_dimension_universe(ctx: _Context) -> None: + config = ctx.mig_context.config + allow_mismatch = config is not None and "1" == config.get_section_option( + "daf_butler_migrate_options", "allow_dimension_universe_mismatch" + ) + if not allow_mismatch: + _LOG.info("Checking that this is an unmodified daf_butler universe 5 repo") + try: + ctx.attributes.validate_dimensions_json(5) + except ValueError as e: + e.add_note( + "Repositories originally created at dimension universe 1 or earlier may have incorrect" + " documentation strings.\n" + "Re-run butler migrate with the flag '--options allow_dimension_universe_mismatch=1' to" + " bypass this check.\n" + "This will overwrite any customizations made to the dimension universe." + ) + raise + + +def _migrate_groups(ctx: _Context) -> None: + # Create group table + _LOG.info("Creating group table") + check_constraints = [] + if ctx.is_sqlite: + check_constraints = [make_string_length_constraint("instrument", 32, "group_len_instrument")] + table = op.create_table( + "group", + sa.Column("instrument", sa.String(32), primary_key=True), + sa.Column("name", sa.Text, primary_key=True), + sa.schema.ForeignKeyConstraint( + columns=["instrument"], + refcolumns=[ctx.get_table("instrument").c.name], + name="fkey_group_instrument_name_instrument", + ), + *check_constraints, + schema=ctx.schema, + ) + + # Populate group table based on the data in the exposure table. + _LOG.info("Populating group table") + exposure_table = ctx.get_table("exposure") + select = sa.select( + exposure_table.columns["instrument"], + exposure_table.columns["group_name"], + ).distinct() + op.execute( + table.insert().from_select( + [ + "instrument", + "name", + ], + select, + ) + ) + + # Create index on instrument + _LOG.info("Creating instrument index for group table") + op.create_index( + "group_fkidx_instrument", + "group", + ["instrument"], + schema=ctx.schema, + ) + + # Update the exposure table to reference the group table. + _LOG.info("Updating exposure table to reference group table") + with op.batch_alter_table("exposure", schema=ctx.schema) as batch_op: + batch_op.alter_column("group_name", new_column_name="group", nullable=False) + batch_op.drop_column("group_id") + + # In theory we should do this create_foreign_key as part of the batch + # above. However, there is some undocumented weirdness with the column + # rename from "group_name" to "group". When done in the batch above, this + # foreign key only works if you specify the original column name instead of + # the final one. This seems fragile (and is likely incompatible with + # Postgres, which ignores the batching). So do it in a separate batch. + with op.batch_alter_table("exposure", schema=ctx.schema) as batch_op: + batch_op.create_foreign_key( + constraint_name="fkey_exposure_group_instrument_name_instrument_group", + referent_table="group", + local_cols=["instrument", "group"], + remote_cols=["instrument", "name"], + referent_schema=ctx.schema, + ) + + # Create index on exposure for group fkey + op.create_index( + "exposure_fkidx_instrument_group", + "exposure", + ["instrument", "group"], + schema=ctx.schema, + ) + + +def _migrate_day_obs(ctx: _Context) -> None: + # Before doing anything else, generate the rows for the new day_obs table + # from the data in the exposure table. This is prone to failure due to the + # need to import instrument classes. + _LOG.info("Generating data for day_obs table from exposure_table") + day_obs_rows = list(_generate_day_obs_rows(ctx)) + + # Create day_obs table + _LOG.info("Creating day_obs table") + check_constraints = [] + if ctx.is_sqlite: + check_constraints = [make_string_length_constraint("instrument", 32, "day_obs_len_instrument")] + + table = op.create_table( + "day_obs", + sa.Column("instrument", sa.String(32), primary_key=True), + sa.Column("id", sa.BigInteger, primary_key=True, autoincrement=False), + *create_timespan_column_definitions("timespan", ctx.dialect), + sa.schema.ForeignKeyConstraint( + columns=["instrument"], + refcolumns=[ctx.get_table("instrument").c.name], + name="fkey_day_obs_instrument_name_instrument", + ), + *check_constraints, + schema=ctx.schema, + ) + + # Populate the day_obs table based on the data in the exposure table. + _LOG.info("Populating day_obs table") + op.bulk_insert(table, day_obs_rows) + + # Create index on instrument + _LOG.info("Creating instrument index for day_obs table") + op.create_index( + "day_obs_fkidx_instrument", + "day_obs", + ["instrument"], + schema=ctx.schema, + ) + + # Update exposure table to reference day_obs table + _LOG.info("Updating exposure table to reference day_obs table") + with op.batch_alter_table("exposure", schema=ctx.schema) as batch_op: + batch_op.alter_column("day_obs", nullable=False) + batch_op.create_foreign_key( + constraint_name="fkey_exposure_day_obs_instrument_id_instrument_day_obs", + referent_table="day_obs", + local_cols=["instrument", "day_obs"], + remote_cols=["instrument", "id"], + referent_schema=ctx.schema, + ) + + # Create index on exposure for day_obs fkey + op.create_index( + "exposure_fkidx_instrument_day_obs", + "exposure", + ["instrument", "day_obs"], + schema=ctx.schema, + ) + + +def _migrate_dimensions_json(ctx: _Context) -> None: + _LOG.info("Updating dimensions.json in ButlerAttributes") + ctx.attributes.replace_dimensions_json(6) + + +def _generate_day_obs_rows(ctx: _Context) -> Iterator[dict]: + exposure_table = ctx.get_table("exposure") + select = sa.select( + exposure_table.columns["instrument"], + exposure_table.columns["day_obs"], + ).distinct() + rows = ctx.bind.execute(select).all() + + instrument_fetcher = _InstrumentFetcher(ctx) + for row in rows: + day_obs = row.day_obs + + # Different instruments define the start and end times for day_obs differently. + instrument_name = row.instrument + instrument_class = instrument_fetcher.get_instrument(instrument_name) + offset = _get_day_obs_offset(instrument_name, instrument_class, day_obs) + + timespan = Timespan.from_day_obs(day_obs, offset) + yield { + "instrument": row.instrument, + "id": day_obs, + **format_timespan_value(timespan, "timespan", ctx.dialect), + } + + +def _get_day_obs_offset(instrument_name: str, instrument: _Instrument, day_obs: int) -> int: + day_as_astropy_time = Timespan.from_day_obs(day_obs, 0).begin + translator = instrument.translatorClass + if translator is None: + raise TypeError( + f"Instrument {instrument_name} does not have a translatorClass defined," + " cannot determine offset for day_obs." + ) + offset = translator.observing_date_to_offset(day_as_astropy_time) + # Convert astropy TimeDelta to integer seconds. + return round(offset.to_value("s")) + + +class _Context: + def __init__(self) -> None: + self.mig_context = alembic.context.get_context() + self.schema = self.mig_context.version_table_schema + bind = self.mig_context.bind + assert bind is not None, "Can't run offline -- need access to database to migrate data." + self.bind = bind + self.dialect = self.bind.dialect.name + self.is_sqlite = self.dialect == "sqlite" + self.metadata = sa.schema.MetaData(schema=self.schema) + self.attributes = ButlerAttributes(self.bind, self.schema) + + def get_table(self, table_name: str) -> sa.Table: + return sa.schema.Table(table_name, self.metadata, autoload_with=self.bind, schema=self.schema) + + +_Instrument: TypeAlias = Any +"""A dynamically loaded lsst.obs_base.Instrument.""" + + +class _InstrumentFetcher: + def __init__(self, ctx: _Context) -> None: + self._instruments: dict[str, _Instrument] = {} + self._ctx = ctx + + def get_instrument(self, name: str) -> _Instrument: + """Dynamically load an lsst.obs_base.Instrument based on its class name stored in the database.""" + instrument = self._instruments.get(name) + if instrument is not None: + return instrument + + instrument_table = self._ctx.get_table("instrument") + rows = self._ctx.bind.execute( + sa.select(instrument_table.c.class_name).where(instrument_table.c.name == name) + ).all() + assert len(rows) == 1, f"Should be exactly one class name for instrument {name}" + class_name = rows[0][0] + _LOG.info(f"Loading instrument definition {name} from class {class_name}") + instrument = doImportType(class_name)() + self._instruments[name] = instrument + return instrument diff --git a/mypy.ini b/mypy.ini index dfc246d..70507e0 100644 --- a/mypy.ini +++ b/mypy.ini @@ -9,6 +9,9 @@ disallow_incomplete_defs = True [mypy-sqlalchemy.*] ignore_missing_imports = True +[mypy-yaml.*] +ignore_missing_imports = True + [mypy-astropy.*] ignore_missing_imports = True diff --git a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py new file mode 100644 index 0000000..6a888f5 --- /dev/null +++ b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py @@ -0,0 +1,80 @@ +# This file is part of daf_butler_migrate. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import difflib +import json + + +def load_historical_dimension_universe_json(universe_version: int) -> str: + """Load a specific version of the default dimension universe as JSON. + + Parameters + ---------- + universe_version : `int` + Version number of the universe to be loaded. + + Returns + ------- + universe : `str` + Dimension universe configuration encoded as a JSON string. + """ + import yaml + from lsst.resources import ResourcePath + + path = ResourcePath( + f"resource://lsst.daf.butler/configs/old_dimensions/daf_butler_universe{universe_version}.yaml" + ) + with path.open() as input: + dimensions = yaml.safe_load(input) + return json.dumps(dimensions) + + +def compare_json_strings(expected: str, actual: str) -> str | None: + """Compare two JSON strings and return a human-readable description of + the differences. + + Parameters + ---------- + expected : `str` + JSON-encoded string to use as the basis for comparison. + actual : `str` + JSON-encoded string to compare with the expected value. + + Returns + ------- + diff : `str` | `None` + If the two inputs parse as equivalent data, returns `None`. If there + are differences between the two inputs, returns a human-readable string + describing the differences. + """ + expected = _normalize_json_string(expected) + actual = _normalize_json_string(actual) + + if expected == actual: + return None + + diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="") + return "\n".join(diff) + + +def _normalize_json_string(json_string: str) -> str: + # Re-encode a JSON string in a standardized format with sorted keys. + return json.dumps(json.loads(json_string), indent=2, sort_keys=True) diff --git a/python/lsst/daf/butler_migrate/butler_attributes.py b/python/lsst/daf/butler_migrate/butler_attributes.py index 5b3bd8f..7b4b619 100644 --- a/python/lsst/daf/butler_migrate/butler_attributes.py +++ b/python/lsst/daf/butler_migrate/butler_attributes.py @@ -27,6 +27,10 @@ import sqlalchemy +from ._dimensions_json_utils import compare_json_strings, load_historical_dimension_universe_json + +_DIMENSIONS_JSON_KEY = "config:dimensions.json" + class ButlerAttributes: """Helper class implementing updates for butler_attributes table. @@ -163,12 +167,15 @@ def get_dimensions_json(self) -> dict[str, Any]: config : `dict` Contents of ``dimensions.json`` as dictionary. """ - key = "config:dimensions.json" + config = json.loads(self._load_dimensions_json()) + return config + + def _load_dimensions_json(self) -> str: + key = _DIMENSIONS_JSON_KEY config_json = self.get(key) if config_json is None: raise LookupError(f"Key {key} does not exist in attributes table") - config = json.loads(config_json) - return config + return config_json def update_dimensions_json(self, update_config: Callable[[dict], dict]) -> None: """Update dimensions definitions in dimensions.json. @@ -179,7 +186,7 @@ def update_dimensions_json(self, update_config: Callable[[dict], dict]) -> None: A method that takes a dictionary representation of the ``dimensions.json`` and returns an updated dictionary. """ - key = "config:dimensions.json" + key = _DIMENSIONS_JSON_KEY config_json = self.get(key) if config_json is None: raise LookupError(f"Key {key} does not exist in attributes table") @@ -190,3 +197,47 @@ def update_dimensions_json(self, update_config: Callable[[dict], dict]) -> None: config_json = json.dumps(config) self.update(key, config_json) + + def validate_dimensions_json(self, expected_universe_version: int) -> None: + """ + Compare the dimensions.json definition stored in the attributes table + with the default daf_butler dimensions.json at a specific version, and + raise an exception if they do not match. + + Parameters + ---------- + expected_universe_version : `int` + Version number of the daf_butler universe that we expect to find in + the DB. + + Raises + ------ + ValueError + If the dimension universe stored in the database does not match the + expected value. + """ + expected_json = load_historical_dimension_universe_json(expected_universe_version) + actual_json = self._load_dimensions_json() + diff = compare_json_strings(expected_json, actual_json) + if diff is not None: + err = ValueError( + "dimensions.json stored in database does not match expected" + f" daf_butler universe version {expected_universe_version}." + ) + err.add_note(f"Differences:\n\n{diff}") + raise err + + return None + + def replace_dimensions_json(self, universe_version: int) -> None: + """Replace the dimensions.json definition stored in the attributes + table to match the default daf_butler dimensions.json at a specific + version. + + Parameters + ---------- + universe_version : `int` + Version number for the daf_butler universe to be saved in the DB. + """ + dimensions = load_historical_dimension_universe_json(universe_version) + self.update(_DIMENSIONS_JSON_KEY, dimensions) diff --git a/python/lsst/daf/butler_migrate/naming.py b/python/lsst/daf/butler_migrate/naming.py index 3bf7eea..c7d9d79 100644 --- a/python/lsst/daf/butler_migrate/naming.py +++ b/python/lsst/daf/butler_migrate/naming.py @@ -32,13 +32,13 @@ from typing import TYPE_CHECKING +import sqlalchemy + from .shrink import shrinkDatabaseEntityName if TYPE_CHECKING: from collections.abc import Iterable - import sqlalchemy - def primary_key_name(table: str, bind: sqlalchemy.engine.Connection) -> str: """Return name of a primary key constraint for a table. @@ -180,3 +180,31 @@ def is_foreign_key_index(table: str, index_name: str) -> bool: def is_regular_index(table: str, index_name: str) -> bool: return index_name.startswith(f"{table}_idx_") + + +def make_string_length_constraint( + column_name: str, max_length: int, constraint_name: str +) -> sqlalchemy.schema.CheckConstraint: + """Create a check constraint that guarantees a string column has a length + that is non-zero and less than a specified maximum. + + These constraints are used by Butler in sqlite databases to emulate + VARCHARs with a specific length. + + Parameters + ---------- + column_name : `str` + The name of the column to create the constraint on. + max_length : `int` + The maximum length allowed for strings stored in this column. + constraint_name : `str` + An arbitrary identifier for the constraint. + + Returns + ------- + check_constraint : `sqlalchemy.schema.CheckConstraint` + The generated check constraint. + """ + return sqlalchemy.schema.CheckConstraint( + f'length("{column_name}")<={max_length} AND length("{column_name}")>=1', name=constraint_name + ) diff --git a/python/lsst/daf/butler_migrate/timespan.py b/python/lsst/daf/butler_migrate/timespan.py new file mode 100644 index 0000000..22aa5a7 --- /dev/null +++ b/python/lsst/daf/butler_migrate/timespan.py @@ -0,0 +1,89 @@ +# This file is part of daf_butler_migrate. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import annotations + +from typing import Any + +import sqlalchemy as sa +from lsst.daf.butler import Timespan +from sqlalchemy.dialects.postgresql import INT8RANGE, Range + + +def create_timespan_column_definitions(column_name: str, dialect: str) -> list[sa.Column]: + """Generate timespan column definitions for a given SQL dialect. + + Parameters + ---------- + column_name : `str` + The name of the column to generate, or the prefix if multiple columns + are generated. + dialect : `str` + The SQL dialect we are generating columns for (``sqlite`` or + ``postgres``). + + Returns + ------- + columns : `list` [ `sqlalchemy.Column` ] + SQLAlchemy column definitions. + """ + if dialect == "postgresql": + # Postgres uses a non-standard range datatype for representing + # timespans. + return [sa.Column(column_name, INT8RANGE)] + elif dialect == "sqlite": + return [ + sa.Column(f"{column_name}_begin", sa.BigInteger), + sa.Column(f"{column_name}_end", sa.BigInteger), + ] + else: + raise ValueError(f"Unhandled SQL dialect {dialect}") + + +def format_timespan_value(timespan: Timespan, column_name: str, dialect: str) -> dict[str, Any]: + """Format timespan values for insertion into a table using SQLAlchemy. + + Parameters + ---------- + timespan : `Timespan` + Value being formatted. + column_name : `str` + The name of the timespan column, or their prefix if the dialect uses + multiple columns. + dialect : `str` + The SQL dialect we are generating values for (``sqlite`` or + ``postgres``). + + Returns + ------- + values : `dict` [ `str`, `typing.Any` ] + Mapping from column name to value for that column. + """ + nanoseconds = timespan.to_simple() + if dialect == "postgresql": + return {column_name: Range(*nanoseconds)} + elif dialect == "sqlite": + return { + f"{column_name}_begin": nanoseconds[0], + f"{column_name}_end": nanoseconds[1], + } + else: + raise ValueError(f"Unhandled SQL dialect {dialect}") diff --git a/tests/test_dimensions_json.py b/tests/test_dimensions_json.py index 5dbce4c..15ee3c2 100644 --- a/tests/test_dimensions_json.py +++ b/tests/test_dimensions_json.py @@ -27,7 +27,7 @@ from lsst.daf.butler.registry.sql_registry import SqlRegistry from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir from lsst.daf.butler.transfers import YamlRepoImportBackend -from lsst.daf.butler_migrate import database, migrate, script +from lsst.daf.butler_migrate import butler_attributes, database, migrate, script TESTDIR = os.path.abspath(os.path.dirname(__file__)) @@ -200,6 +200,19 @@ def test_upgrade_v2(self) -> None: ], ) + def test_validate_dimensions_json(self) -> None: + self.make_butler_v0() + universe = 5 + with self.db.connect() as connection: + attribs = butler_attributes.ButlerAttributes(connection) + with self.assertRaisesRegex( + ValueError, "dimensions.json stored in database does not match expected" + ): + attribs.validate_dimensions_json(universe) + + attribs.replace_dimensions_json(universe) + self.assertIsNone(attribs.validate_dimensions_json(universe)) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_dimensions_json_utils.py b/tests/test_dimensions_json_utils.py new file mode 100644 index 0000000..4106921 --- /dev/null +++ b/tests/test_dimensions_json_utils.py @@ -0,0 +1,59 @@ +# This file is part of daf_butler_migrate. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import json +import unittest + +from lsst.daf.butler_migrate._dimensions_json_utils import ( + compare_json_strings, + load_historical_dimension_universe_json, +) + + +class DimensionUtilsTestCase(unittest.TestCase): + """Test dimensions JSON utility functions.""" + + def test_universe_load(self) -> None: + self._check_universe_load(5) + self._check_universe_load(6) + + def _check_universe_load(self, version: int) -> None: + universe = load_historical_dimension_universe_json(version) + loaded_version_number = json.loads(universe)["version"] + self.assertEqual(loaded_version_number, version) + + def test_equal_json_strings(self) -> None: + a = '{ "a": {"b": 1, "c": 2}}' + b = '{ "a": {"c": 2, "b": 1}}' + self.assertIsNone(compare_json_strings(a, b)) + + def test_non_equal_json_strings(self) -> None: + a = '{ "a": 1 }' + b = '{ "a": {"c": 2, "b": 1}}' + diff = compare_json_strings(a, b) + self.assertEqual( + diff, + """--- \n+++ \n@@ -1,3 +1,6 @@\n {\n- "a": 1\n+ "a": {\n+ "b": 1,\n+ "c": 2\n+ }\n }""", + ) + + +if __name__ == "__main__": + unittest.main()