Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-45764: Return ingest_date as astropy time from general query #1060

Merged
merged 2 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 1 addition & 34 deletions python/lsst/daf/butler/column_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
"COLLECTION_NAME_MAX_LENGTH",
)

import datetime
import textwrap
import uuid
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -153,38 +152,6 @@ def deserialize(self, value: Any) -> Any:
return value if value is None else self._type_adapter.validate_python(value)


class _DateTimeColumnValueSerializer(ColumnValueSerializer):
"""Implementation of serializer for ingest_time column. That column can be
either in native database time appearing as `datetime.datetime` on Python
side or integer nanoseconds appearing as astropy.time.Time. We use pydantic
type adapter for astropy time, which serializes it into integer
nanoseconds. datetime is converted to string representation to distinguish
it from integer nanoseconds (timezone handling depends entirely on what
database returns).
"""

def __init__(self) -> None:
self._astropy_adapter = pydantic.TypeAdapter(SerializableTime)

def serialize(self, value: Any) -> Any:
# Docstring inherited.
if value is None:
return None
elif isinstance(value, datetime.datetime):
return value.isoformat()
else:
return self._astropy_adapter.dump_python(value)

def deserialize(self, value: Any) -> Any:
# Docstring inherited.
if value is None:
return None
elif isinstance(value, str):
return datetime.datetime.fromisoformat(value)
else:
return self._astropy_adapter.validate_python(value)


class _BaseColumnSpec(pydantic.BaseModel, ABC):
"""Base class for descriptions of table columns."""

Expand Down Expand Up @@ -459,7 +426,7 @@ def to_arrow(self) -> arrow_utils.ToArrow:

def serializer(self) -> ColumnValueSerializer:
# Docstring inherited.
return _DateTimeColumnValueSerializer()
return _TypeAdapterColumnValueSerializer(pydantic.TypeAdapter(SerializableTime))


ColumnSpec = Annotated[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@

from __future__ import annotations

import datetime
from abc import abstractmethod
from collections.abc import Iterable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

import astropy.time
import sqlalchemy

from .._dataset_ref import DatasetRef
Expand Down Expand Up @@ -331,6 +333,8 @@ def __init__(self, spec: GeneralResultSpec, ctx: ResultPageConverterContext) ->
column_name = qt.ColumnSet.get_qualified_name(column.logical_table, column.field)
if column.field == TimespanDatabaseRepresentation.NAME:
self.converters.append(_TimespanGeneralColumnConverter(column_name, ctx.db))
elif column.field == "ingest_date":
self.converters.append(_TimestampGeneralColumnConverter(column_name))
else:
self.converters.append(_DefaultGeneralColumnConverter(column_name))

Expand Down Expand Up @@ -377,6 +381,29 @@ def convert(self, row: sqlalchemy.Row) -> Any:
return row._mapping[self.name]


class _TimestampGeneralColumnConverter(_GeneralColumnConverter):
"""Converter that transforms ``datetime`` instances into astropy Time. Only
``dataset.ingest_date`` column was using native timestamps in the initial
schema version, and we are switching to our common nanoseconds-since-epoch
representation for that column in newer schema versions. For both schema
versions we want to return astropy time to clients.

Parameters
----------
name : `str`
Column name
"""

def __init__(self, name: str):
self.name = name

def convert(self, row: sqlalchemy.Row) -> Any:
value = row._mapping[self.name]
if isinstance(value, datetime.datetime):
value = astropy.time.Time(value, scale="utc").tai
return value


class _TimespanGeneralColumnConverter(_GeneralColumnConverter):
"""Converter that extracts timespan from the row.

Expand Down
29 changes: 29 additions & 0 deletions python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,35 @@ def test_general_query(self) -> None:
{Timespan(t1, t2), Timespan(t2, t3), Timespan(t3, None), Timespan.makeEmpty(), None},
)

def test_query_ingest_date(self) -> None:
"""Test general query returning ingest_date field."""
before_ingest = astropy.time.Time.now()
butler = self.make_butler("base.yaml", "datasets.yaml")
dimensions = DimensionGroup(butler.dimensions, ["detector", "physical_filter"])

# Check that returned type of ingest_date is astropy Time, must work
# for schema versions 1 and 2 of datasets manager.
with butler._query() as query:
query = query.join_dataset_search("flat", "imported_g")
rows = list(query.general(dimensions, dataset_fields={"flat": ...}))
self.assertEqual(len(rows), 3)
for row in rows:
self.assertIsInstance(row["flat.ingest_date"], astropy.time.Time)

# Check that WHERE accepts astropy time
with butler._query() as query:
query = query.join_dataset_search("flat", "imported_g")
query1 = query.where("flat.ingest_date < before_ingest", bind={"before_ingest": before_ingest})
rows = list(query1.general(dimensions))
self.assertEqual(len(rows), 0)
query1 = query.where("flat.ingest_date >= before_ingest", bind={"before_ingest": before_ingest})
rows = list(query1.general(dimensions))
self.assertEqual(len(rows), 3)
# Same with a time in string literal.
query1 = query.where(f"flat.ingest_date < T'mjd/{before_ingest.tai.mjd}'")
rows = list(query1.general(dimensions))
self.assertEqual(len(rows), 0)

def test_implied_union_record_query(self) -> None:
"""Test queries for a dimension ('band') that uses "implied union"
storage, in which its values are the union of the values for it in a
Expand Down
Loading