Skip to content

Commit

Permalink
Merge pull request #1060 from lsst/tickets/DM-45764
Browse files Browse the repository at this point in the history
DM-45764: Return ingest_date as astropy time from general query
  • Loading branch information
andy-slac authored Aug 19, 2024
2 parents d09db1e + 6366d3b commit 028a2be
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 34 deletions.
35 changes: 1 addition & 34 deletions python/lsst/daf/butler/column_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
"COLLECTION_NAME_MAX_LENGTH",
)

import datetime
import textwrap
import uuid
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -153,38 +152,6 @@ def deserialize(self, value: Any) -> Any:
return value if value is None else self._type_adapter.validate_python(value)


class _DateTimeColumnValueSerializer(ColumnValueSerializer):
"""Implementation of serializer for ingest_time column. That column can be
either in native database time appearing as `datetime.datetime` on Python
side or integer nanoseconds appearing as astropy.time.Time. We use pydantic
type adapter for astropy time, which serializes it into integer
nanoseconds. datetime is converted to string representation to distinguish
it from integer nanoseconds (timezone handling depends entirely on what
database returns).
"""

def __init__(self) -> None:
self._astropy_adapter = pydantic.TypeAdapter(SerializableTime)

def serialize(self, value: Any) -> Any:
# Docstring inherited.
if value is None:
return None
elif isinstance(value, datetime.datetime):
return value.isoformat()
else:
return self._astropy_adapter.dump_python(value)

def deserialize(self, value: Any) -> Any:
# Docstring inherited.
if value is None:
return None
elif isinstance(value, str):
return datetime.datetime.fromisoformat(value)
else:
return self._astropy_adapter.validate_python(value)


class _BaseColumnSpec(pydantic.BaseModel, ABC):
"""Base class for descriptions of table columns."""

Expand Down Expand Up @@ -459,7 +426,7 @@ def to_arrow(self) -> arrow_utils.ToArrow:

def serializer(self) -> ColumnValueSerializer:
# Docstring inherited.
return _DateTimeColumnValueSerializer()
return _TypeAdapterColumnValueSerializer(pydantic.TypeAdapter(SerializableTime))


ColumnSpec = Annotated[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@

from __future__ import annotations

import datetime
from abc import abstractmethod
from collections.abc import Iterable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

import astropy.time
import sqlalchemy

from .._dataset_ref import DatasetRef
Expand Down Expand Up @@ -331,6 +333,8 @@ def __init__(self, spec: GeneralResultSpec, ctx: ResultPageConverterContext) ->
column_name = qt.ColumnSet.get_qualified_name(column.logical_table, column.field)
if column.field == TimespanDatabaseRepresentation.NAME:
self.converters.append(_TimespanGeneralColumnConverter(column_name, ctx.db))
elif column.field == "ingest_date":
self.converters.append(_TimestampGeneralColumnConverter(column_name))
else:
self.converters.append(_DefaultGeneralColumnConverter(column_name))

Expand Down Expand Up @@ -377,6 +381,29 @@ def convert(self, row: sqlalchemy.Row) -> Any:
return row._mapping[self.name]


class _TimestampGeneralColumnConverter(_GeneralColumnConverter):
"""Converter that transforms ``datetime`` instances into astropy Time. Only
``dataset.ingest_date`` column was using native timestamps in the initial
schema version, and we are switching to our common nanoseconds-since-epoch
representation for that column in newer schema versions. For both schema
versions we want to return astropy time to clients.
Parameters
----------
name : `str`
Column name
"""

def __init__(self, name: str):
self.name = name

def convert(self, row: sqlalchemy.Row) -> Any:
value = row._mapping[self.name]
if isinstance(value, datetime.datetime):
value = astropy.time.Time(value, scale="utc").tai
return value


class _TimespanGeneralColumnConverter(_GeneralColumnConverter):
"""Converter that extracts timespan from the row.
Expand Down
29 changes: 29 additions & 0 deletions python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,35 @@ def test_general_query(self) -> None:
{Timespan(t1, t2), Timespan(t2, t3), Timespan(t3, None), Timespan.makeEmpty(), None},
)

def test_query_ingest_date(self) -> None:
"""Test general query returning ingest_date field."""
before_ingest = astropy.time.Time.now()
butler = self.make_butler("base.yaml", "datasets.yaml")
dimensions = DimensionGroup(butler.dimensions, ["detector", "physical_filter"])

# Check that returned type of ingest_date is astropy Time, must work
# for schema versions 1 and 2 of datasets manager.
with butler._query() as query:
query = query.join_dataset_search("flat", "imported_g")
rows = list(query.general(dimensions, dataset_fields={"flat": ...}))
self.assertEqual(len(rows), 3)
for row in rows:
self.assertIsInstance(row["flat.ingest_date"], astropy.time.Time)

# Check that WHERE accepts astropy time
with butler._query() as query:
query = query.join_dataset_search("flat", "imported_g")
query1 = query.where("flat.ingest_date < before_ingest", bind={"before_ingest": before_ingest})
rows = list(query1.general(dimensions))
self.assertEqual(len(rows), 0)
query1 = query.where("flat.ingest_date >= before_ingest", bind={"before_ingest": before_ingest})
rows = list(query1.general(dimensions))
self.assertEqual(len(rows), 3)
# Same with a time in string literal.
query1 = query.where(f"flat.ingest_date < T'mjd/{before_ingest.tai.mjd}'")
rows = list(query1.general(dimensions))
self.assertEqual(len(rows), 0)

def test_implied_union_record_query(self) -> None:
"""Test queries for a dimension ('band') that uses "implied union"
storage, in which its values are the union of the values for it in a
Expand Down

0 comments on commit 028a2be

Please sign in to comment.