Skip to content

Commit

Permalink
Add expression string syntax for null boolean columns
Browse files Browse the repository at this point in the history
Add support for `= NULL` and `!= NULL` syntax for boolean columns in query where strings.
  • Loading branch information
dhirving committed Aug 10, 2024
1 parent d740e47 commit ff10942
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 5 deletions.
38 changes: 37 additions & 1 deletion python/lsst/daf/butler/queries/_expression_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from .tree import (
BinaryExpression,
ColumnExpression,
ColumnReference,
ComparisonOperator,
LiteralValue,
Predicate,
Expand Down Expand Up @@ -158,6 +159,16 @@ def visitBinaryOp(
return Predicate.is_null(rhs.value)
case ["!=", _Null(), _ColExpr() as rhs]:
return Predicate.is_null(rhs.value).logical_not()
# Boolean columns can be null, but will have been converted to
# Predicate, so we need additional cases.
case ["=" | "!=", Predicate() as pred, _Null()] | ["=" | "!=", _Null(), Predicate() as pred]:
column_ref = _get_boolean_column_reference(pred)
if column_ref is not None:
match operator:
case "=":
return Predicate.is_null(column_ref)
case "!=":
return Predicate.is_null(column_ref).logical_not()

# Handle arithmetic operations
case [("+" | "-" | "*" | "/" | "%") as op, _ColExpr() as lhs, _ColExpr() as rhs]:
Expand Down Expand Up @@ -202,7 +213,16 @@ def visitIdentifier(self, name: str, node: Node) -> _VisitorResult:
if column_expression.column_type == "bool":
# Expression-handling code (in this file and elsewhere) expects
# boolean-valued expressions to be represented as Predicate, not a
# column expression.
# ColumnExpression.

# We should only be getting direct references to a column, not a
# more complicated expression.
# (Anything more complicated should be a Predicate already.)
assert (
column_expression.expression_type == "dataset_field"
or column_expression.expression_type == "dimension_field"
or column_expression.expression_type == "dimension_key"
)
return Predicate.from_bool_expression(column_expression)
else:
return _ColExpr(column_expression)
Expand Down Expand Up @@ -310,3 +330,19 @@ def _convert_in_clause_to_predicate(lhs: ColumnExpression, rhs: _VisitorResult,
return Predicate.is_null(lhs)
case _:
raise InvalidQueryError(f"Invalid IN expression: '{node!s}")


def _get_boolean_column_reference(predicate: Predicate) -> ColumnReference | None:
"""Unwrap a predicate to recover the boolean ColumnReference it contains.
Returns `None` if this Predicate contains anything other than a single
boolean ColumnReference operand.
This undoes the ColumnReference to Predicate conversion that occurs in
visitIdentifier for boolean columns.
"""
if len(predicate.operands) == 1 and len(predicate.operands[0]) == 1:
predicate_leaf = predicate.operands[0][0]
if predicate_leaf.predicate_type == "boolean_wrapper":
return predicate_leaf.operand

return None
9 changes: 5 additions & 4 deletions python/lsst/daf/butler/queries/tree/_predicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from ._base import QueryTreeBase
from ._column_expression import (
ColumnExpression,
ColumnReference,
is_one_datetime_and_one_ingest_date,
is_one_timespan_and_one_datetime,
)
Expand Down Expand Up @@ -156,9 +157,9 @@ def from_bool(cls, value: bool) -> Predicate:
return cls.model_construct(operands=() if value else ((),))

@classmethod
def from_bool_expression(cls, value: ColumnExpression) -> Predicate:
"""Construct a predicate that wraps a boolean ColumnExpression, taking
on the value of the underlying ColumnExpression.
def from_bool_expression(cls, value: ColumnReference) -> Predicate:
"""Construct a predicate that wraps a boolean ColumnReference, taking
on the value of the underlying ColumnReference.
Parameters
----------
Expand Down Expand Up @@ -437,7 +438,7 @@ class BooleanWrapper(PredicateLeafBase):

predicate_type: Literal["boolean_wrapper"] = "boolean_wrapper"

operand: ColumnExpression
operand: ColumnReference
"""Wrapped expression that will be used as the value for this predicate."""

def gather_required_columns(self, columns: ColumnSet) -> None:
Expand Down
12 changes: 12 additions & 0 deletions python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,18 @@ def _run_query(where: str) -> list[int]:
query_func("exposure.can_see_sky OR exposure = 2001"), [TRUE_ID, FALSE_ID_1]
)

# Find nulls and non-nulls. This is run only against the new query
# system. It appears that the `= NULL` syntax never had test coverage
# in the old query system and doesn't appear to work for any column
# types, not just bool. Not worth fixing since we are dropping that
# code soon.
nulls = [NULL_ID_1, NULL_ID_2]
non_nulls = [TRUE_ID, FALSE_ID_1, FALSE_ID_2]
self.assertCountEqual(_run_query("exposure.can_see_sky = NULL"), nulls)
self.assertCountEqual(_run_query("exposure.can_see_sky != NULL"), non_nulls)
self.assertCountEqual(_run_query("NULL = exposure.can_see_sky"), nulls)
self.assertCountEqual(_run_query("NULL != exposure.can_see_sky"), non_nulls)

# Test boolean columns in ExpressionFactory.
with butler._query() as query:
x = query.expression_factory
Expand Down

0 comments on commit ff10942

Please sign in to comment.