Skip to content

Commit

Permalink
Ignore documentation string mismatch in dimension validation
Browse files Browse the repository at this point in the history
Users are having trouble running universe v5 -> v6 migrations because of the error generated for mismatched dimension universes.  There is a known mismatch of documentation strings in some older repositories that is spuriously throwing an error.

Add a special case to ignore this known mismatch so that users don't have to add special options to complete the migration.
  • Loading branch information
dhirving committed May 24, 2024
1 parent 7afde34 commit a720a1e
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 5 deletions.
2 changes: 0 additions & 2 deletions migrations/dimensions-config/1fae088c80b6.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ def _validate_initial_dimension_universe(ctx: MigrationContext) -> None:
ctx.attributes.validate_dimensions_json(5)
except ValueError as e:
e.add_note(
"Repositories originally created at dimension universe 1 or earlier may have incorrect"
" documentation strings.\n"
"Re-run butler migrate with the flag '--options allow_dimension_universe_mismatch=1' to"
" bypass this check.\n"
"This will overwrite any customizations made to the dimension universe."
Expand Down
14 changes: 12 additions & 2 deletions python/lsst/daf/butler_migrate/_dimensions_json_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import difflib
import json
from typing import Literal

import yaml
from lsst.resources import ResourcePath
Expand Down Expand Up @@ -66,7 +67,9 @@ def load_historical_dimension_universe_json(universe_version: int) -> str:
return json.dumps(dimensions)


def compare_json_strings(expected: str, actual: str) -> str | None:
def compare_json_strings(
expected: str, actual: str, diff_style: Literal["unified", "ndiff"] = "unified"
) -> str | None:
"""Compare two JSON strings and return a human-readable description of
the differences.
Expand All @@ -76,6 +79,8 @@ def compare_json_strings(expected: str, actual: str) -> str | None:
JSON-encoded string to use as the basis for comparison.
actual : `str`
JSON-encoded string to compare with the expected value.
diff_style : "unified" | "ndiff"
What type of diff to return.
Returns
-------
Expand All @@ -90,7 +95,12 @@ def compare_json_strings(expected: str, actual: str) -> str | None:
if expected == actual:
return None

diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="")
if diff_style == "unified":
diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="")
elif diff_style == "ndiff":
diff = difflib.ndiff(expected.splitlines(), actual.splitlines())
else:
raise ValueError(f"Unknown {diff_style=}")
return "\n".join(diff)


Expand Down
33 changes: 32 additions & 1 deletion python/lsst/daf/butler_migrate/butler_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from __future__ import annotations

import json
import re
from collections.abc import Callable, Iterable
from typing import Any, cast

Expand Down Expand Up @@ -219,7 +220,7 @@ def validate_dimensions_json(self, expected_universe_version: int) -> None:
expected_json = load_historical_dimension_universe_json(expected_universe_version)
actual_json = self._load_dimensions_json()
diff = compare_json_strings(expected_json, actual_json)
if diff is not None:
if diff is not None and not _is_expected_dimensions_json_mismatch(expected_json, actual_json):
err = ValueError(
"dimensions.json stored in database does not match expected"
f" daf_butler universe version {expected_universe_version}."
Expand All @@ -241,3 +242,33 @@ def replace_dimensions_json(self, universe_version: int) -> None:
"""
dimensions = load_historical_dimension_universe_json(universe_version)
self.update(_DIMENSIONS_JSON_KEY, dimensions)


def _is_expected_dimensions_json_mismatch(expected_json: str, actual_json: str) -> bool:
"""Return `True` if the two dimension universe JSON strings differ only in
ways expected because of the history of this data. Older repositories that
have been previously migrated have some documentation strings that don't
match the current dimension universe because of how dimension universes
were patched in earlier migrations.
"""
diff = compare_json_strings(expected_json, actual_json, diff_style="ndiff")
if diff is None:
return True

return all(_is_expected_diff_line(line) for line in diff.splitlines())


def _is_expected_diff_line(line: str) -> bool:
# ndiff prefix for matching lines and "hint" lines.
if line.startswith(" ") or line.startswith("? "):
return True

# Lines containing only docstring changes.
if re.match(r'^[-+]\s+"doc":', line):
return True

# Empty line.
if line.strip() == "":
return True

return False
20 changes: 20 additions & 0 deletions tests/test_dimensions_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,26 @@ def test_validate_dimensions_json(self) -> None:
attribs.replace_dimensions_json(universe)
attribs.validate_dimensions_json(universe)

def test_ignore_expected_dimension_json_mismatch(self) -> None:
original = '{"a": 1, "doc": "good"}'
self.assertTrue(butler_attributes._is_expected_dimensions_json_mismatch(original, original))
# Mismatched doc but everything else OK
self.assertTrue(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "doc": "bad"}')
)
# Order doesn't matter
self.assertTrue(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"doc": "bad", "a": 1}')
)
# Mismatched non-doc value
self.assertFalse(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "good"}')
)
# Mismatched value and doc
self.assertFalse(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "bad"}')
)


class SQLiteDimensionsJsonTestCase(DimensionsJsonTestCase, unittest.TestCase):
"""Test using SQLite backend."""
Expand Down

0 comments on commit a720a1e

Please sign in to comment.