diff --git a/migrations/dimensions-config/1fae088c80b6.py b/migrations/dimensions-config/1fae088c80b6.py index 4372b71..c688fbd 100644 --- a/migrations/dimensions-config/1fae088c80b6.py +++ b/migrations/dimensions-config/1fae088c80b6.py @@ -84,8 +84,6 @@ def _validate_initial_dimension_universe(ctx: MigrationContext) -> None: ctx.attributes.validate_dimensions_json(5) except ValueError as e: e.add_note( - "Repositories originally created at dimension universe 1 or earlier may have incorrect" - " documentation strings.\n" "Re-run butler migrate with the flag '--options allow_dimension_universe_mismatch=1' to" " bypass this check.\n" "This will overwrite any customizations made to the dimension universe." diff --git a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py index 9f145ae..5fbd1c4 100644 --- a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py +++ b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py @@ -21,6 +21,7 @@ import difflib import json +from typing import Literal import yaml from lsst.resources import ResourcePath @@ -66,7 +67,9 @@ def load_historical_dimension_universe_json(universe_version: int) -> str: return json.dumps(dimensions) -def compare_json_strings(expected: str, actual: str) -> str | None: +def compare_json_strings( + expected: str, actual: str, diff_style: Literal["unified", "ndiff"] = "unified" +) -> str | None: """Compare two JSON strings and return a human-readable description of the differences. @@ -76,6 +79,8 @@ def compare_json_strings(expected: str, actual: str) -> str | None: JSON-encoded string to use as the basis for comparison. actual : `str` JSON-encoded string to compare with the expected value. + diff_style : "unified" | "ndiff" + What type of diff to return. Returns ------- @@ -90,7 +95,12 @@ def compare_json_strings(expected: str, actual: str) -> str | None: if expected == actual: return None - diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="") + if diff_style == "unified": + diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="") + elif diff_style == "ndiff": + diff = difflib.ndiff(expected.splitlines(), actual.splitlines()) + else: + raise ValueError(f"Unknown {diff_style=}") return "\n".join(diff) diff --git a/python/lsst/daf/butler_migrate/butler_attributes.py b/python/lsst/daf/butler_migrate/butler_attributes.py index 7b4b619..0451078 100644 --- a/python/lsst/daf/butler_migrate/butler_attributes.py +++ b/python/lsst/daf/butler_migrate/butler_attributes.py @@ -22,6 +22,7 @@ from __future__ import annotations import json +import re from collections.abc import Callable, Iterable from typing import Any, cast @@ -219,7 +220,7 @@ def validate_dimensions_json(self, expected_universe_version: int) -> None: expected_json = load_historical_dimension_universe_json(expected_universe_version) actual_json = self._load_dimensions_json() diff = compare_json_strings(expected_json, actual_json) - if diff is not None: + if diff is not None and not _is_expected_dimensions_json_mismatch(expected_json, actual_json): err = ValueError( "dimensions.json stored in database does not match expected" f" daf_butler universe version {expected_universe_version}." @@ -241,3 +242,33 @@ def replace_dimensions_json(self, universe_version: int) -> None: """ dimensions = load_historical_dimension_universe_json(universe_version) self.update(_DIMENSIONS_JSON_KEY, dimensions) + + +def _is_expected_dimensions_json_mismatch(expected_json: str, actual_json: str) -> bool: + """Return `True` if the two dimension universe JSON strings differ only in + ways expected because of the history of this data. Older repositories that + have been previously migrated have some documentation strings that don't + match the current dimension universe because of how dimension universes + were patched in earlier migrations. + """ + diff = compare_json_strings(expected_json, actual_json, diff_style="ndiff") + if diff is None: + return True + + return all(_is_expected_diff_line(line) for line in diff.splitlines()) + + +def _is_expected_diff_line(line: str) -> bool: + # ndiff prefix for matching lines and "hint" lines. + if line.startswith(" ") or line.startswith("? "): + return True + + # Lines containing only docstring changes. + if re.match(r'^[-+]\s+"doc":', line): + return True + + # Empty line. + if line.strip() == "": + return True + + return False diff --git a/tests/test_dimensions_json.py b/tests/test_dimensions_json.py index 15dbbfc..03a8815 100644 --- a/tests/test_dimensions_json.py +++ b/tests/test_dimensions_json.py @@ -328,6 +328,26 @@ def test_validate_dimensions_json(self) -> None: attribs.replace_dimensions_json(universe) attribs.validate_dimensions_json(universe) + def test_ignore_expected_dimension_json_mismatch(self) -> None: + original = '{"a": 1, "doc": "good"}' + self.assertTrue(butler_attributes._is_expected_dimensions_json_mismatch(original, original)) + # Mismatched doc but everything else OK + self.assertTrue( + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "doc": "bad"}') + ) + # Order doesn't matter + self.assertTrue( + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"doc": "bad", "a": 1}') + ) + # Mismatched non-doc value + self.assertFalse( + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "good"}') + ) + # Mismatched value and doc + self.assertFalse( + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "bad"}') + ) + class SQLiteDimensionsJsonTestCase(DimensionsJsonTestCase, unittest.TestCase): """Test using SQLite backend."""