Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-38084: update docs and exception messages to improve timespan-querying usability #865

Merged
merged 3 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/DM-38084.doc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix docs on referring to timespans in queries, and make related error messages more helpful.
6 changes: 3 additions & 3 deletions doc/lsst.daf.butler/queries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -425,11 +425,11 @@ Few examples of valid expressions using some of the constructs:

(visit = 100 OR visit = 101) AND exposure % 2 = 1

visit.datetime_begin > T'2020-03-30 12:20:33'
visit.timespan.begin > T'2020-03-30 12:20:33'

exposure.datetime_begin > T'58938.515'
exposure.timespan.begin > T'58938.515'

visit.datetime_end < T'mjd/58938.515/tai'
visit.timespan.end < T'mjd/58938.515/tai'

ingest_date < T'2020-11-06 21:10:00'

Expand Down
8 changes: 5 additions & 3 deletions python/lsst/daf/butler/cli/opt/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,11 @@ def _config_split(*args: Any) -> dict[str, str]:
"--order-by",
help=unwrap(
"""One or more comma-separated names used to order records. Names can be dimension names,
metadata names optionally prefixed by a dimension name and dot, or
timestamp_begin/timestamp_end (with optional dimension name). To reverse ordering for a name
prefix it with a minus sign."""
metadata field names, or "timespan.begin" / "timespan.end" for temporal dimensions.
In some cases the dimension for a metadata field or timespan bound can be inferred, but usually
qualifying these with "<dimension>.<field>" is necessary.
To reverse ordering for a name, prefix it with a minus sign.
"""
),
multiple=True,
callback=split_commas,
Expand Down
33 changes: 24 additions & 9 deletions python/lsst/daf/butler/registry/queries/expressions/categorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,17 @@ def categorizeElementId(universe: DimensionUniverse, name: str) -> tuple[Dimensi
so at least its message should generally be propagated up to a context
where the error can be interpreted by a human.
"""
table, sep, column = name.partition(".")
table, _, column = name.partition(".")
if column:
try:
element = universe[table]
except KeyError as err:
raise LookupError(f"No dimension element with name '{table}'.") from err
except KeyError:
if table == "timespan" or table == "datetime" or table == "timestamp":
raise LookupError(
"Dimension element name cannot be inferred in this context; "
f"use <dimension>.timespan.{column} instead."
) from None
raise LookupError(f"No dimension element with name {table!r} in {name!r}.") from None
if isinstance(element, Dimension) and column == element.primaryKey.name:
# Allow e.g. "visit.id = x" instead of just "visit = x"; this
# can be clearer.
Expand Down Expand Up @@ -172,8 +177,9 @@ def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionEl
field_name = name
elif len(matches) > 1:
raise ValueError(
f"Timespan exists in more than one dimesion element: {matches},"
" qualify timespan with specific dimension name."
"Timespan exists in more than one dimension element "
f"({', '.join(element.name for element in matches)}); "
"qualify timespan with specific dimension name."
)
else:
raise ValueError(f"Cannot find any temporal dimension element for '{name}'.")
Expand All @@ -197,16 +203,21 @@ def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionEl
field_name = name
elif len(match_pairs) > 1:
raise ValueError(
f"Metadata '{name}' exists in more than one dimension element: "
f"{[element for element, _ in match_pairs]}, qualify metadata name with dimension name."
f"Metadata '{name}' exists in more than one dimension element "
f"({', '.join(element.name for element, _ in match_pairs)}); "
"qualify field name with dimension name."
)
else:
raise ValueError(f"Metadata '{name}' cannot be found in any dimension.")
else:
# qualified name, must be a dimension element and a field
elem_name, _, field_name = name.partition(".")
if elem_name not in graph.elements.names:
raise ValueError(f"Unknown dimension element name '{elem_name}'")
if field_name == "begin" or field_name == "end":
raise ValueError(
f"Unknown dimension element {elem_name!r}; perhaps you meant 'timespan.{field_name}'?"
)
raise ValueError(f"Unknown dimension element {elem_name!r}.")
element = graph.elements[elem_name]
if field_name in ("timespan.begin", "timespan.end"):
if not element.temporal:
Expand Down Expand Up @@ -289,7 +300,11 @@ def categorizeElementOrderByName(element: DimensionElement, name: str) -> str |
# qualified name, must be a dimension element and a field
elem_name, _, field_name = name.partition(".")
if elem_name != element.name:
raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'")
if field_name == "begin" or field_name == "end":
extra = f"; perhaps you meant 'timespan.{field_name}'?"
else:
extra = "."
raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'{extra}")
if field_name in ("timespan.begin", "timespan.end"):
if not element.temporal:
raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
Expand Down
33 changes: 31 additions & 2 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2765,6 +2765,18 @@ def testQueryResultSummaries(self):
self.assertTrue(messages)
self.assertTrue(any("no-purpose" in message for message in messages))

def testQueryDataIdsExpressionError(self):
"""Test error checking of 'where' expressions in queryDataIds."""
registry = self.makeRegistry()
self.loadData(registry, "base.yaml")
bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
registry.queryDataIds(["detector"], where="foo.bar = 12")
with self.assertRaisesRegex(
LookupError, "Dimension element name cannot be inferred in this context."
):
registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)

def testQueryDataIdsOrderBy(self):
"""Test order_by and limit on result returned by queryDataIds()."""
registry = self.makeRegistry()
Expand Down Expand Up @@ -2858,7 +2870,7 @@ def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
list(do_query().order_by(order_by))

for order_by in ("undimension.name", "-undimension.name"):
with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
list(do_query().order_by(order_by))

for order_by in ("attract", "-attract"):
Expand All @@ -2868,7 +2880,11 @@ def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
list(do_query(("exposure", "visit")).order_by("exposure_time"))

with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
with self.assertRaisesRegex(
ValueError,
r"Timespan exists in more than one dimension element \(exposure, visit\); "
r"qualify timespan with specific dimension name\.",
):
list(do_query(("exposure", "visit")).order_by("timespan.begin"))

with self.assertRaisesRegex(
Expand All @@ -2882,6 +2898,11 @@ def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
list(do_query("tract").order_by("tract.name"))

with self.assertRaisesRegex(
ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
):
list(do_query("visit").order_by("timestamp.begin"))

def testQueryDataIdsGovernorExceptions(self):
"""Test exceptions raised by queryDataIds() for incorrect governors."""
registry = self.makeRegistry()
Expand Down Expand Up @@ -3001,6 +3022,14 @@ def do_query(element, datasets=None, collections=None):
with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
list(do_query("detector").order_by(order_by))

for order_by in ("timestamp.begin", "-timestamp.begin"):
with self.assertRaisesRegex(
ValueError,
r"Element name mismatch: 'timestamp' instead of 'visit'; "
r"perhaps you meant 'timespan.begin'\?",
):
list(do_query("visit").order_by(order_by))

def testQueryDimensionRecordsExceptions(self):
"""Test exceptions raised by queryDimensionRecords()."""
registry = self.makeRegistry()
Expand Down