Skip to content

Commit

Permalink
Filter JSON instead of diff
Browse files Browse the repository at this point in the history
Instead of trying to filter out doc strings after running the diff, adjust the input to the diff to get the same effect.  This is a little less fragile.
  • Loading branch information
dhirving committed Jun 13, 2024
1 parent 0da4428 commit fe805b7
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 34 deletions.
14 changes: 2 additions & 12 deletions python/lsst/daf/butler_migrate/_dimensions_json_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import difflib
import json
from typing import Literal

import yaml
from lsst.resources import ResourcePath
Expand Down Expand Up @@ -67,9 +66,7 @@ def load_historical_dimension_universe_json(universe_version: int) -> str:
return json.dumps(dimensions)


def compare_json_strings(
expected: str, actual: str, diff_style: Literal["unified", "ndiff"] = "unified"
) -> str | None:
def compare_json_strings(expected: str, actual: str) -> str | None:
"""Compare two JSON strings and return a human-readable description of
the differences.
Expand All @@ -79,8 +76,6 @@ def compare_json_strings(
JSON-encoded string to use as the basis for comparison.
actual : `str`
JSON-encoded string to compare with the expected value.
diff_style : "unified" | "ndiff"
What type of diff to return.
Returns
-------
Expand All @@ -95,12 +90,7 @@ def compare_json_strings(
if expected == actual:
return None

if diff_style == "unified":
diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="")
elif diff_style == "ndiff":
diff = difflib.ndiff(expected.splitlines(), actual.splitlines())
else:
raise ValueError(f"Unknown {diff_style=}")
diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="")
return "\n".join(diff)


Expand Down
37 changes: 20 additions & 17 deletions python/lsst/daf/butler_migrate/butler_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from __future__ import annotations

import json
import re
from collections.abc import Callable, Iterable
from typing import Any, cast

Expand Down Expand Up @@ -251,24 +250,28 @@ def _is_expected_dimensions_json_mismatch(expected_json: str, actual_json: str)
# don't match the current dimension universe because of how dimension
# universes were patched in earlier migrations.

diff = compare_json_strings(expected_json, actual_json, diff_style="ndiff")
if diff is None:
return True
diff = compare_json_strings(
_strip_doc_properties_from_json(expected_json), _strip_doc_properties_from_json(actual_json)
)
return diff is None

return all(_is_expected_diff_line(line) for line in diff.splitlines())

def _strip_doc_properties_from_json(json_string: str) -> str:
# Remove any properties named 'doc' from objects in the given JSON string.
dictionary = json.loads(json_string)
stripped = _strip_doc_properties_from_dict(dictionary)
return json.dumps(stripped)

def _is_expected_diff_line(line: str) -> bool:
# ndiff prefix for matching lines and "hint" lines.
if line.startswith(" ") or line.startswith("? "):
return True

# Lines containing only docstring changes.
if re.match(r'^[-+]\s+"doc":', line):
return True
def _strip_doc_properties_from_dict(dictionary: dict[str, object]) -> dict[str, object]:
# Recursively remove any properties named 'doc' from the given dict or any
# dicts in its values.
stripped: dict[str, object] = {}
for key, value in dictionary.items():
if key != "doc":
if isinstance(value, dict):
stripped[key] = _strip_doc_properties_from_dict(value)
else:
stripped[key] = value

# Empty line.
if line.strip() == "":
return True

return False
return stripped
12 changes: 7 additions & 5 deletions tests/test_dimensions_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,23 +329,25 @@ def test_validate_dimensions_json(self) -> None:
attribs.validate_dimensions_json(universe)

def test_ignore_expected_dimension_json_mismatch(self) -> None:
original = '{"a": 1, "doc": "good"}'
original = '{"a": 1, "b": {"doc": "good"}}'
self.assertTrue(butler_attributes._is_expected_dimensions_json_mismatch(original, original))
# Mismatched doc but everything else OK
self.assertTrue(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "doc": "bad"}')
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "b": {"doc": "bad"}}')
)
# Order doesn't matter
self.assertTrue(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"doc": "bad", "a": 1}')
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"b": {"doc": "bad"}, "a": 1}')
)
# Mismatched non-doc value
self.assertFalse(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "good"}')
butler_attributes._is_expected_dimensions_json_mismatch(
original, '{"a": 2, "b": {"doc": "good"}}'
)
)
# Mismatched value and doc
self.assertFalse(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "bad"}')
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "b": {"doc": "bad"}}')
)


Expand Down

0 comments on commit fe805b7

Please sign in to comment.