From fe805b778fec3900c40351117f51d91cf759dc72 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Thu, 13 Jun 2024 11:01:37 -0700 Subject: [PATCH] Filter JSON instead of diff Instead of trying to filter out doc strings after running the diff, adjust the input to the diff to get the same effect. This is a little less fragile. --- .../butler_migrate/_dimensions_json_utils.py | 14 +------ .../daf/butler_migrate/butler_attributes.py | 37 ++++++++++--------- tests/test_dimensions_json.py | 12 +++--- 3 files changed, 29 insertions(+), 34 deletions(-) diff --git a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py index 5fbd1c4..9f145ae 100644 --- a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py +++ b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py @@ -21,7 +21,6 @@ import difflib import json -from typing import Literal import yaml from lsst.resources import ResourcePath @@ -67,9 +66,7 @@ def load_historical_dimension_universe_json(universe_version: int) -> str: return json.dumps(dimensions) -def compare_json_strings( - expected: str, actual: str, diff_style: Literal["unified", "ndiff"] = "unified" -) -> str | None: +def compare_json_strings(expected: str, actual: str) -> str | None: """Compare two JSON strings and return a human-readable description of the differences. @@ -79,8 +76,6 @@ def compare_json_strings( JSON-encoded string to use as the basis for comparison. actual : `str` JSON-encoded string to compare with the expected value. - diff_style : "unified" | "ndiff" - What type of diff to return. Returns ------- @@ -95,12 +90,7 @@ def compare_json_strings( if expected == actual: return None - if diff_style == "unified": - diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="") - elif diff_style == "ndiff": - diff = difflib.ndiff(expected.splitlines(), actual.splitlines()) - else: - raise ValueError(f"Unknown {diff_style=}") + diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="") return "\n".join(diff) diff --git a/python/lsst/daf/butler_migrate/butler_attributes.py b/python/lsst/daf/butler_migrate/butler_attributes.py index 878598e..5f17bf3 100644 --- a/python/lsst/daf/butler_migrate/butler_attributes.py +++ b/python/lsst/daf/butler_migrate/butler_attributes.py @@ -22,7 +22,6 @@ from __future__ import annotations import json -import re from collections.abc import Callable, Iterable from typing import Any, cast @@ -251,24 +250,28 @@ def _is_expected_dimensions_json_mismatch(expected_json: str, actual_json: str) # don't match the current dimension universe because of how dimension # universes were patched in earlier migrations. - diff = compare_json_strings(expected_json, actual_json, diff_style="ndiff") - if diff is None: - return True + diff = compare_json_strings( + _strip_doc_properties_from_json(expected_json), _strip_doc_properties_from_json(actual_json) + ) + return diff is None - return all(_is_expected_diff_line(line) for line in diff.splitlines()) +def _strip_doc_properties_from_json(json_string: str) -> str: + # Remove any properties named 'doc' from objects in the given JSON string. + dictionary = json.loads(json_string) + stripped = _strip_doc_properties_from_dict(dictionary) + return json.dumps(stripped) -def _is_expected_diff_line(line: str) -> bool: - # ndiff prefix for matching lines and "hint" lines. - if line.startswith(" ") or line.startswith("? "): - return True - # Lines containing only docstring changes. - if re.match(r'^[-+]\s+"doc":', line): - return True +def _strip_doc_properties_from_dict(dictionary: dict[str, object]) -> dict[str, object]: + # Recursively remove any properties named 'doc' from the given dict or any + # dicts in its values. + stripped: dict[str, object] = {} + for key, value in dictionary.items(): + if key != "doc": + if isinstance(value, dict): + stripped[key] = _strip_doc_properties_from_dict(value) + else: + stripped[key] = value - # Empty line. - if line.strip() == "": - return True - - return False + return stripped diff --git a/tests/test_dimensions_json.py b/tests/test_dimensions_json.py index 03a8815..10c8824 100644 --- a/tests/test_dimensions_json.py +++ b/tests/test_dimensions_json.py @@ -329,23 +329,25 @@ def test_validate_dimensions_json(self) -> None: attribs.validate_dimensions_json(universe) def test_ignore_expected_dimension_json_mismatch(self) -> None: - original = '{"a": 1, "doc": "good"}' + original = '{"a": 1, "b": {"doc": "good"}}' self.assertTrue(butler_attributes._is_expected_dimensions_json_mismatch(original, original)) # Mismatched doc but everything else OK self.assertTrue( - butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "doc": "bad"}') + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "b": {"doc": "bad"}}') ) # Order doesn't matter self.assertTrue( - butler_attributes._is_expected_dimensions_json_mismatch(original, '{"doc": "bad", "a": 1}') + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"b": {"doc": "bad"}, "a": 1}') ) # Mismatched non-doc value self.assertFalse( - butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "good"}') + butler_attributes._is_expected_dimensions_json_mismatch( + original, '{"a": 2, "b": {"doc": "good"}}' + ) ) # Mismatched value and doc self.assertFalse( - butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "bad"}') + butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "b": {"doc": "bad"}}') )