From 1eca829854cc7bbb1bf9d42e251dd60cc083af02 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Fri, 24 May 2024 15:42:22 -0700 Subject: [PATCH 1/3] Ignore documentation string mismatch in dimension validation Users are having trouble running universe v5 -> v6 migrations because of the error generated for mismatched dimension universes. There is a known mismatch of documentation strings in some older repositories that is spuriously throwing an error. Add a special case to ignore this known mismatch so that users don't have to add special options to complete the migration. --- migrations/dimensions-config/1fae088c80b6.py | 2 -- .../butler_migrate/_dimensions_json_utils.py | 14 ++++++-- .../daf/butler_migrate/butler_attributes.py | 33 ++++++++++++++++++- tests/test_dimensions_json.py | 20 +++++++++++ 4 files changed, 64 insertions(+), 5 deletions(-) diff --git a/migrations/dimensions-config/1fae088c80b6.py b/migrations/dimensions-config/1fae088c80b6.py index 4372b71..c688fbd 100644 --- a/migrations/dimensions-config/1fae088c80b6.py +++ b/migrations/dimensions-config/1fae088c80b6.py @@ -84,8 +84,6 @@ def _validate_initial_dimension_universe(ctx: MigrationContext) -> None: ctx.attributes.validate_dimensions_json(5) except ValueError as e: e.add_note( - "Repositories originally created at dimension universe 1 or earlier may have incorrect" - " documentation strings.\n" "Re-run butler migrate with the flag '--options allow_dimension_universe_mismatch=1' to" " bypass this check.\n" "This will overwrite any customizations made to the dimension universe." diff --git a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py index 9f145ae..5fbd1c4 100644 --- a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py +++ b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py @@ -21,6 +21,7 @@ import difflib import json +from typing import Literal import yaml from lsst.resources import ResourcePath @@ -66,7 +67,9 @@ def load_historical_dimension_universe_json(universe_version: int) -> str: return json.dumps(dimensions) -def compare_json_strings(expected: str, actual: str) -> str | None: +def compare_json_strings( + expected: str, actual: str, diff_style: Literal["unified", "ndiff"] = "unified" +) -> str | None: """Compare two JSON strings and return a human-readable description of the differences. @@ -76,6 +79,8 @@ def compare_json_strings(expected: str, actual: str) -> str | None: JSON-encoded string to use as the basis for comparison. actual : `str` JSON-encoded string to compare with the expected value. + diff_style : "unified" | "ndiff" + What type of diff to return. Returns ------- @@ -90,7 +95,12 @@ def compare_json_strings(expected: str, actual: str) -> str | None: if expected == actual: return None - diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="") + if diff_style == "unified": + diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="") + elif diff_style == "ndiff": + diff = difflib.ndiff(expected.splitlines(), actual.splitlines()) + else: + raise ValueError(f"Unknown {diff_style=}") return "\n".join(diff) diff --git a/python/lsst/daf/butler_migrate/butler_attributes.py b/python/lsst/daf/butler_migrate/butler_attributes.py index 7b4b619..878598e 100644 --- a/python/lsst/daf/butler_migrate/butler_attributes.py +++ b/python/lsst/daf/butler_migrate/butler_attributes.py @@ -22,6 +22,7 @@ from __future__ import annotations import json +import re from collections.abc import Callable, Iterable from typing import Any, cast @@ -219,7 +220,7 @@ def validate_dimensions_json(self, expected_universe_version: int) -> None: expected_json = load_historical_dimension_universe_json(expected_universe_version) actual_json = self._load_dimensions_json() diff = compare_json_strings(expected_json, actual_json) - if diff is not None: + if diff is not None and not _is_expected_dimensions_json_mismatch(expected_json, actual_json): err = ValueError( "dimensions.json stored in database does not match expected" f" daf_butler universe version {expected_universe_version}." @@ -241,3 +242,33 @@ def replace_dimensions_json(self, universe_version: int) -> None: """ dimensions = load_historical_dimension_universe_json(universe_version) self.update(_DIMENSIONS_JSON_KEY, dimensions) + + +def _is_expected_dimensions_json_mismatch(expected_json: str, actual_json: str) -> bool: + # Return `True` if the two dimension universe JSON strings differ only in + # ways expected because of the history of this data. Older repositories + # that have been previously migrated have some documentation strings that + # don't match the current dimension universe because of how dimension + # universes were patched in earlier migrations. + + diff = compare_json_strings(expected_json, actual_json, diff_style="ndiff") + if diff is None: + return True + + return all(_is_expected_diff_line(line) for line in diff.splitlines()) + + +def _is_expected_diff_line(line: str) -> bool: + # ndiff prefix for matching lines and "hint" lines. + if line.startswith(" ") or line.startswith("? "): + return True + + # Lines containing only docstring changes. + if re.match(r'^[-+]\s+"doc":', line): + return True + + # Empty line. + if line.strip() == "": + return True + + return False diff --git a/tests/test_dimensions_json.py b/tests/test_dimensions_json.py index 15dbbfc..03a8815 100644 --- a/tests/test_dimensions_json.py +++ b/tests/test_dimensions_json.py @@ -328,6 +328,26 @@ def test_validate_dimensions_json(self) -> None: attribs.replace_dimensions_json(universe) attribs.validate_dimensions_json(universe) + def test_ignore_expected_dimension_json_mismatch(self) -> None: + original = '{"a": 1, "doc": "good"}' + self.assertTrue(butler_attributes._is_expected_dimensions_json_mismatch(original, original)) + # Mismatched doc but everything else OK + self.assertTrue( + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "doc": "bad"}') + ) + # Order doesn't matter + self.assertTrue( + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"doc": "bad", "a": 1}') + ) + # Mismatched non-doc value + self.assertFalse( + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "good"}') + ) + # Mismatched value and doc + self.assertFalse( + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "bad"}') + ) + class SQLiteDimensionsJsonTestCase(DimensionsJsonTestCase, unittest.TestCase): """Test using SQLite backend.""" From 0da44288d98f0b782d1bf067f42407198d27bdfa Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Fri, 24 May 2024 15:49:22 -0700 Subject: [PATCH 2/3] Fix build for new uv version In the new version of uv, they dropped support for the VIRTUAL_ENV environment variable. --- .github/workflows/build_docs.yaml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml index 0e0040a..59cac5c 100644 --- a/.github/workflows/build_docs.yaml +++ b/.github/workflows/build_docs.yaml @@ -25,25 +25,21 @@ jobs: - name: Install sqlite run: sudo apt-get install sqlite libyaml-dev - - name: Set the VIRTUAL_ENV variable for uv to work - run: | - echo "VIRTUAL_ENV=${Python_ROOT_DIR}" >> $GITHUB_ENV - - name: Update pip/wheel infrastructure run: | python -m pip install --upgrade pip pip install uv - uv pip install wheel + uv pip install --system wheel - name: Install dependencies run: | - uv pip install -r requirements.txt + uv pip install --system -r requirements.txt - name: Build and install - run: uv pip install --no-deps -v -e . + run: uv pip install --system --no-deps -v -e . - name: Install documenteer - run: uv pip install 'documenteer[pipelines]==0.8.2' + run: uv pip install --system 'documenteer[pipelines]==0.8.2' - name: Build documentation env: From fe805b778fec3900c40351117f51d91cf759dc72 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Thu, 13 Jun 2024 11:01:37 -0700 Subject: [PATCH 3/3] Filter JSON instead of diff Instead of trying to filter out doc strings after running the diff, adjust the input to the diff to get the same effect. This is a little less fragile. --- .../butler_migrate/_dimensions_json_utils.py | 14 +------ .../daf/butler_migrate/butler_attributes.py | 37 ++++++++++--------- tests/test_dimensions_json.py | 12 +++--- 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py index 5fbd1c4..9f145ae 100644 --- a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py +++ b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py @@ -21,7 +21,6 @@ import difflib import json -from typing import Literal import yaml from lsst.resources import ResourcePath @@ -67,9 +66,7 @@ def load_historical_dimension_universe_json(universe_version: int) -> str: return json.dumps(dimensions) -def compare_json_strings( - expected: str, actual: str, diff_style: Literal["unified", "ndiff"] = "unified" -) -> str | None: +def compare_json_strings(expected: str, actual: str) -> str | None: """Compare two JSON strings and return a human-readable description of the differences. @@ -79,8 +76,6 @@ def compare_json_strings( JSON-encoded string to use as the basis for comparison. actual : `str` JSON-encoded string to compare with the expected value. - diff_style : "unified" | "ndiff" - What type of diff to return. Returns ------- @@ -95,12 +90,7 @@ def compare_json_strings( if expected == actual: return None - if diff_style == "unified": - diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="") - elif diff_style == "ndiff": - diff = difflib.ndiff(expected.splitlines(), actual.splitlines()) - else: - raise ValueError(f"Unknown {diff_style=}") + diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="") return "\n".join(diff) diff --git a/python/lsst/daf/butler_migrate/butler_attributes.py b/python/lsst/daf/butler_migrate/butler_attributes.py index 878598e..5f17bf3 100644 --- a/python/lsst/daf/butler_migrate/butler_attributes.py +++ b/python/lsst/daf/butler_migrate/butler_attributes.py @@ -22,7 +22,6 @@ from __future__ import annotations import json -import re from collections.abc import Callable, Iterable from typing import Any, cast @@ -251,24 +250,28 @@ def _is_expected_dimensions_json_mismatch(expected_json: str, actual_json: str) # don't match the current dimension universe because of how dimension # universes were patched in earlier migrations. - diff = compare_json_strings(expected_json, actual_json, diff_style="ndiff") - if diff is None: - return True + diff = compare_json_strings( + _strip_doc_properties_from_json(expected_json), _strip_doc_properties_from_json(actual_json) + ) + return diff is None - return all(_is_expected_diff_line(line) for line in diff.splitlines()) +def _strip_doc_properties_from_json(json_string: str) -> str: + # Remove any properties named 'doc' from objects in the given JSON string. + dictionary = json.loads(json_string) + stripped = _strip_doc_properties_from_dict(dictionary) + return json.dumps(stripped) -def _is_expected_diff_line(line: str) -> bool: - # ndiff prefix for matching lines and "hint" lines. - if line.startswith(" ") or line.startswith("? "): - return True - # Lines containing only docstring changes. - if re.match(r'^[-+]\s+"doc":', line): - return True +def _strip_doc_properties_from_dict(dictionary: dict[str, object]) -> dict[str, object]: + # Recursively remove any properties named 'doc' from the given dict or any + # dicts in its values. + stripped: dict[str, object] = {} + for key, value in dictionary.items(): + if key != "doc": + if isinstance(value, dict): + stripped[key] = _strip_doc_properties_from_dict(value) + else: + stripped[key] = value - # Empty line. - if line.strip() == "": - return True - - return False + return stripped diff --git a/tests/test_dimensions_json.py b/tests/test_dimensions_json.py index 03a8815..10c8824 100644 --- a/tests/test_dimensions_json.py +++ b/tests/test_dimensions_json.py @@ -329,23 +329,25 @@ def test_validate_dimensions_json(self) -> None: attribs.validate_dimensions_json(universe) def test_ignore_expected_dimension_json_mismatch(self) -> None: - original = '{"a": 1, "doc": "good"}' + original = '{"a": 1, "b": {"doc": "good"}}' self.assertTrue(butler_attributes._is_expected_dimensions_json_mismatch(original, original)) # Mismatched doc but everything else OK self.assertTrue( - butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "doc": "bad"}') + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "b": {"doc": "bad"}}') ) # Order doesn't matter self.assertTrue( - butler_attributes._is_expected_dimensions_json_mismatch(original, '{"doc": "bad", "a": 1}') + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"b": {"doc": "bad"}, "a": 1}') ) # Mismatched non-doc value self.assertFalse( - butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "good"}') + butler_attributes._is_expected_dimensions_json_mismatch( + original, '{"a": 2, "b": {"doc": "good"}}' + ) ) # Mismatched value and doc self.assertFalse( - butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "bad"}') + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "b": {"doc": "bad"}}') )