Skip to content

Commit

Permalink
Merge pull request #1054 from lsst/tickets/DM-45752
Browse files Browse the repository at this point in the history
DM-45752: Support region overlap against points
  • Loading branch information
dhirving authored Aug 14, 2024
2 parents 6ba7992 + 659e30c commit 05928f5
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 29 deletions.
4 changes: 4 additions & 0 deletions doc/changes/DM-45752.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Region overlap queries can now use points as regions. Points can be specified
as `region OVERLAPS POINT(ra, dec)`, or by binding an `lsst.sphgeom.LonLat` or
`astropy.coordinates.SkyCoord` value. (At the moment, this feature is only
available when using client/server Butler.)
37 changes: 20 additions & 17 deletions doc/lsst.daf.butler/queries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -292,38 +292,41 @@ Here are few examples for checking containment in a time range:
timestamp_id IN (interval.begin, interval.end)
range_id NOT IN (interval_id)
The same ``IN`` operator can be used for checking containment of a point or
region inside other region. Presently there are no special literal type for
regions, so this can only be done with regions represented by identifiers. Few
examples of region containment:

.. code-block:: sql
POINT(ra, dec) IN (region1)
region2 NOT IN (region1)
OVERLAPS operator
^^^^^^^^^^^^^^^^^

The ``OVERLAPS`` operator checks for overlapping time ranges or regions, its
arguments have to have consistent types. Like with ``IN`` operator time ranges
can be represented with a tuple of two timestamps (literals or identifiers) or
with a single identifier. Regions can only be used as identifiers.
``OVERLAPS`` syntax is similar to ``IN`` but it does not require parentheses
on right hand side when there is a single identifier representing a time range
or a region.
with a single identifier.


Few examples of the syntax:
Examples of the syntax for time ranges:

.. code-block:: sql
(T'2020-01-01', T'2022-01-01') OVERLAPS (T'2019-01-01', T'2021-01-01')
(interval.begin, interval.end) OVERLAPS interval_2
interval_1 OVERLAPS interval_2
NOT (region_1 OVERLAPS region_2)
You can check for overlap of a region with a point using the ``POINT(ra, dec)`` syntax,
where ``ra`` and ``dec`` are specified as an ICRS sky position in degrees.

.. code-block:: sql
visit.region OVERLAPS POINT(53.6, -32.7)
You can check overlaps with arbitrary sky regions by binding values (see
:ref:`_daf_butler_dimension_expressions_identifiers`). Bound region values may
be specified as the following object types:

- ``lsst.sphgeom.Region``
- ``lsst.sphgeom.LonLat``
- ``astropy.coordinates.SkyCoord``

.. code-block:: sql
visit.region OVERLAPS my_region
Boolean operators
^^^^^^^^^^^^^^^^^
Expand Down
26 changes: 23 additions & 3 deletions python/lsst/daf/butler/queries/_expression_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@
from typing import Literal, NamedTuple, TypeAlias

import astropy.time
import lsst.sphgeom

from .._exceptions import InvalidQueryError
from .._timespan import Timespan
from ..column_spec import ColumnType
from ..dimensions import DimensionUniverse
from ..registry.queries.expressions.categorize import ExpressionConstant, categorizeConstant
from ..registry.queries.expressions.parser import Node, RangeLiteral, TreeVisitor, parse_expression
from ..registry.queries.expressions.parser import Node, PointNode, RangeLiteral, TreeVisitor, parse_expression
from ._identifiers import IdentifierContext, interpret_identifier
from .tree import (
BinaryExpression,
Expand Down Expand Up @@ -239,8 +240,12 @@ def visitNumericLiteral(self, value: str, node: Node) -> _VisitorResult:
def visitParens(self, expression: _VisitorResult, node: Node) -> _VisitorResult:
return expression

def visitPointNode(self, ra: _VisitorResult, dec: _VisitorResult, node: Node) -> _VisitorResult:
raise NotImplementedError("POINT() function is not supported yet")
def visitPointNode(self, ra: _VisitorResult, dec: _VisitorResult, node: PointNode) -> _VisitorResult:
ra_value = _get_float_literal_value(ra, node.ra)
dec_value = _get_float_literal_value(dec, node.dec)

lon_lat = lsst.sphgeom.LonLat.fromDegrees(ra_value, dec_value)
return _make_literal(lon_lat)

def visitRangeLiteral(
self, start: int, stop: int, stride: int | None, node: RangeLiteral
Expand Down Expand Up @@ -346,3 +351,18 @@ def _get_boolean_column_reference(predicate: Predicate) -> ColumnReference | Non
return predicate_leaf.operand

return None


def _get_float_literal_value(value: _VisitorResult, node: Node) -> float:
"""If the given ``value`` is a literal `float` or `int` expression, return
it as a float. Otherwise raise an `InvalidQueryError` identifying a
problem with the given ``node``.
"""
if isinstance(value, _ColExpr):
expr = value.value
if expr.expression_type == "float":
return expr.value
elif expr.expression_type == "int":
return float(expr.value)

raise InvalidQueryError(f"Expression '{node}' in POINT() is not a literal number.")
43 changes: 38 additions & 5 deletions python/lsst/daf/butler/queries/tree/_column_literal.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,26 @@
from functools import cached_property
from typing import Literal, TypeAlias, Union, final

import astropy.coordinates
import astropy.time
import erfa
from lsst.sphgeom import Region
import lsst.sphgeom

from ..._timespan import Timespan
from ...time_utils import TimeConverter
from ._base import ColumnLiteralBase

LiteralValue: TypeAlias = (
int | str | float | bytes | uuid.UUID | astropy.time.Time | datetime.datetime | Timespan | Region
int
| str
| float
| bytes
| uuid.UUID
| astropy.time.Time
| datetime.datetime
| Timespan
| lsst.sphgeom.Region
| lsst.sphgeom.LonLat
)


Expand Down Expand Up @@ -310,10 +320,10 @@ class RegionColumnLiteral(ColumnLiteralBase):
@cached_property
def value(self) -> bytes:
"""The wrapped value."""
return Region.decode(b64decode(self.encoded))
return lsst.sphgeom.Region.decode(b64decode(self.encoded))

@classmethod
def from_value(cls, value: Region) -> RegionColumnLiteral:
def from_value(cls, value: lsst.sphgeom.Region) -> RegionColumnLiteral:
"""Construct from the wrapped value.
Parameters
Expand Down Expand Up @@ -374,6 +384,29 @@ def make_column_literal(value: LiteralValue) -> ColumnLiteral:
return DateTimeColumnLiteral.from_value(astropy.time.Time(value))
case Timespan():
return TimespanColumnLiteral.from_value(value)
case Region():
case lsst.sphgeom.Region():
return RegionColumnLiteral.from_value(value)
case lsst.sphgeom.LonLat():
return _make_region_literal_from_lonlat(value)
case astropy.coordinates.SkyCoord():
icrs = value.transform_to("icrs")
if not icrs.isscalar:
raise ValueError(
"Astropy SkyCoord contained an array of points,"
f" but it should be only a single point: {value}"
)

ra = icrs.ra.degree
dec = icrs.dec.degree
lon_lat = lsst.sphgeom.LonLat.fromDegrees(ra, dec)
return _make_region_literal_from_lonlat(lon_lat)

raise TypeError(f"Invalid type {type(value).__name__!r} of value {value!r} for column literal.")


def _make_region_literal_from_lonlat(point: lsst.sphgeom.LonLat) -> RegionColumnLiteral:
vec = lsst.sphgeom.UnitVector3d(point)
# Convert the point to a Region by representing it as a zero-radius
# Circle.
region = lsst.sphgeom.Circle(vec)
return RegionColumnLiteral.from_value(region)
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"IsIn",
"NumericLiteral",
"Parens",
"PointNode",
"RangeLiteral",
"StringLiteral",
"TimeLiteral",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
if TYPE_CHECKING:
import astropy.time

from .exprTree import Node, RangeLiteral
from .exprTree import Node, PointNode, RangeLiteral


T = TypeVar("T")
Expand Down Expand Up @@ -228,7 +228,7 @@ def visitFunctionCall(self, name: str, args: list[T], node: Node) -> T:
raise ValueError(f"Unknown function '{name}' in expression")

@abstractmethod
def visitPointNode(self, ra: T, dec: T, node: Node) -> T:
def visitPointNode(self, ra: T, dec: T, node: PointNode) -> T:
"""Visit PointNode node.
Parameters
Expand Down
93 changes: 91 additions & 2 deletions python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@
from typing import ClassVar
from uuid import UUID

import astropy.coordinates
import astropy.time
from lsst.sphgeom import Region
from lsst.sphgeom import LonLat, Region

from .._butler import Butler
from .._collection_type import CollectionType
Expand All @@ -47,7 +48,7 @@
from .._timespan import Timespan
from ..dimensions import DataCoordinate, DimensionGroup, DimensionRecord
from ..direct_query_driver import DirectQueryDriver
from ..queries import DimensionRecordQueryResults
from ..queries import DimensionRecordQueryResults, Query
from ..queries.tree import Predicate
from ..registry import NoDefaultCollectionError, RegistryDefaults
from ..registry.sql_registry import SqlRegistry
Expand Down Expand Up @@ -598,6 +599,94 @@ def test_spatial_overlaps(self) -> None:
has_postprocessing=True,
)

# Check spatial queries using points instead of regions.
# This (ra, dec) is a point in the center of the region for visit
# 1, detector 3.
ra = 0.25209391431545386 # degrees
dec = 0.9269112711026793 # degrees

def _check_visit_id(query: Query) -> None:
result = list(query.data_ids(["visit", "detector"]))
self.assertEqual(len(result), 1)
id = result[0]
self.assertEqual(id["visit"], 1)
self.assertEqual(id["detector"], 3)

# Basic POINT() syntax.
_check_visit_id(query.where(f"visit_detector_region.region OVERLAPS POINT({ra}, {dec})"))
_check_visit_id(query.where(f"POINT({ra}, {dec}) OVERLAPS visit_detector_region.region"))

# dec of 1 is close enough to still be in the region, and tests
# conversion of integer to float.
_check_visit_id(query.where(f"visit_detector_region.region OVERLAPS POINT({ra}, 1)"))

# Substitute ra and dec values via bind instead of literals in the
# string.
_check_visit_id(
query.where(
"visit_detector_region.region OVERLAPS POINT(ra, dec)", bind={"ra": ra, "dec": dec}
)
)

# Bind in a point object instead of specifying ra/dec separately.
_check_visit_id(
query.where(
"visit_detector_region.region OVERLAPS my_point",
bind={"my_point": LonLat.fromDegrees(ra, dec)},
)
)
_check_visit_id(
query.where(
"visit_detector_region.region OVERLAPS my_point",
bind={"my_point": astropy.coordinates.SkyCoord(ra, dec, frame="icrs", unit="deg")},
)
)
# Make sure alternative coordinate frames in astropy SkyCoord are
# handled.
_check_visit_id(
query.where(
"visit_detector_region.region OVERLAPS my_point",
bind={
"my_point": astropy.coordinates.SkyCoord(
ra, dec, frame="icrs", unit="deg"
).transform_to("galactic")
},
)
)

# Compare against literal values using ExpressionFactory.
_check_visit_id(
query.where(_x.visit_detector_region.region.overlaps(LonLat.fromDegrees(ra, dec)))
)
_check_visit_id(
query.where(
_x.visit_detector_region.region.overlaps(
astropy.coordinates.SkyCoord(ra, dec, frame="icrs", unit="deg")
)
)
)

# Check errors for invalid syntax.
with self.assertRaisesRegex(
InvalidQueryError, r"Expression 'visit.id' in POINT\(\) is not a literal number."
):
query.where(f"visit_detector_region.region OVERLAPS POINT(visit.id, {dec})"),
with self.assertRaisesRegex(
InvalidQueryError, r"Expression ''not-a-number'' in POINT\(\) is not a literal number."
):
query.where(f"visit_detector_region.region OVERLAPS POINT({ra}, 'not-a-number')")

# astropy's SkyCoord can be array-valued, but we expect only a
# single point.
array_point = astropy.coordinates.SkyCoord(
ra=[10, 11, 12, 13], dec=[41, -5, 42, 0], unit="deg", frame="icrs"
)
with self.assertRaisesRegex(ValueError, "Astropy SkyCoord contained an array of points"):
query.where(
"visit_detector_region.region OVERLAPS my_point",
bind={"my_point": array_point},
)

def test_common_skypix_overlaps(self) -> None:
"""Test spatial overlap queries that return htm7 records."""
butler = self.make_butler("base.yaml", "spatial.yaml")
Expand Down

0 comments on commit 05928f5

Please sign in to comment.