Skip to content

Commit

Permalink
Return ingest_date as astropy time from general query (DM-45764)
Browse files Browse the repository at this point in the history
There is no easy way to catch/convert ingest_date in the rows returned
from all queries. For now only the general query can return ingest_date,
so it is an easy fix for that case, though it requires checking type
of the value in that column for each row.
  • Loading branch information
andy-slac committed Aug 16, 2024
1 parent 41ffdbb commit 26900bf
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 34 deletions.
35 changes: 1 addition & 34 deletions python/lsst/daf/butler/column_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
"COLLECTION_NAME_MAX_LENGTH",
)

import datetime
import textwrap
import uuid
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -153,38 +152,6 @@ def deserialize(self, value: Any) -> Any:
return value if value is None else self._type_adapter.validate_python(value)


class _DateTimeColumnValueSerializer(ColumnValueSerializer):
"""Implementation of serializer for ingest_time column. That column can be
either in native database time appearing as `datetime.datetime` on Python
side or integer nanoseconds appearing as astropy.time.Time. We use pydantic
type adapter for astropy time, which serializes it into integer
nanoseconds. datetime is converted to string representation to distinguish
it from integer nanoseconds (timezone handling depends entirely on what
database returns).
"""

def __init__(self) -> None:
self._astropy_adapter = pydantic.TypeAdapter(SerializableTime)

def serialize(self, value: Any) -> Any:
# Docstring inherited.
if value is None:
return None
elif isinstance(value, datetime.datetime):
return value.isoformat()
else:
return self._astropy_adapter.dump_python(value)

def deserialize(self, value: Any) -> Any:
# Docstring inherited.
if value is None:
return None
elif isinstance(value, str):
return datetime.datetime.fromisoformat(value)
else:
return self._astropy_adapter.validate_python(value)


class _BaseColumnSpec(pydantic.BaseModel, ABC):
"""Base class for descriptions of table columns."""

Expand Down Expand Up @@ -459,7 +426,7 @@ def to_arrow(self) -> arrow_utils.ToArrow:

def serializer(self) -> ColumnValueSerializer:
# Docstring inherited.
return _DateTimeColumnValueSerializer()
return _TypeAdapterColumnValueSerializer(pydantic.TypeAdapter(SerializableTime))


ColumnSpec = Annotated[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@

from __future__ import annotations

import datetime
from abc import abstractmethod
from collections.abc import Iterable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any

import astropy.time
import sqlalchemy

from .._dataset_ref import DatasetRef
Expand Down Expand Up @@ -331,6 +333,8 @@ def __init__(self, spec: GeneralResultSpec, ctx: ResultPageConverterContext) ->
column_name = qt.ColumnSet.get_qualified_name(column.logical_table, column.field)
if column.field == TimespanDatabaseRepresentation.NAME:
self.converters.append(_TimespanGeneralColumnConverter(column_name, ctx.db))
elif column.field == "ingest_date":
self.converters.append(_TimestampGeneralColumnConverter(column_name))
else:
self.converters.append(_DefaultGeneralColumnConverter(column_name))

Expand Down Expand Up @@ -377,6 +381,29 @@ def convert(self, row: sqlalchemy.Row) -> Any:
return row._mapping[self.name]


class _TimestampGeneralColumnConverter(_GeneralColumnConverter):
"""Converter that transforms ``datetime`` instances into astropy Time. Only
``dataset.ingest_date`` column was using native timestamps in the initial
schema version, and we are switching to our common nanoseconds-since-epoch
representation for that column in newer schema versions. For both schema
versions we want to return astropy time to clients.
Parameters
----------
name : `str`
Column name
"""

def __init__(self, name: str):
self.name = name

def convert(self, row: sqlalchemy.Row) -> Any:
value = row._mapping[self.name]
if isinstance(value, datetime.datetime):
value = astropy.time.Time(value, scale="utc").tai
return value


class _TimespanGeneralColumnConverter(_GeneralColumnConverter):
"""Converter that extracts timespan from the row.
Expand Down
14 changes: 14 additions & 0 deletions python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,20 @@ def test_general_query(self) -> None:
{Timespan(t1, t2), Timespan(t2, t3), Timespan(t3, None), Timespan.makeEmpty(), None},
)

def test_query_ingest_date(self) -> None:
"""Test general query returning ingest_date field."""
butler = self.make_butler("base.yaml", "datasets.yaml")
dimensions = DimensionGroup(butler.dimensions, ["detector", "physical_filter"])

# Check that returned type of ingest_date is astropy Time, must work
# for schema versions 1 and 2 of datasets manager.
with butler._query() as query:
query = query.join_dataset_search("flat", "imported_g")
rows = list(query.general(dimensions, dataset_fields={"flat": ...}))
self.assertEqual(len(rows), 3)
for row in rows:
self.assertIsInstance(row["flat.ingest_date"], astropy.time.Time)

def test_implied_union_record_query(self) -> None:
"""Test queries for a dimension ('band') that uses "implied union"
storage, in which its values are the union of the values for it in a
Expand Down

0 comments on commit 26900bf

Please sign in to comment.