From 5af8e22b7b25a2051e20a13aaad9b01b4229c8fc Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 23 Jan 2025 15:24:24 -0700 Subject: [PATCH] Add simple provenance to FITS headers This adds "LSST BUTLER" headers to reflect the DatasetRef associated with this file. --- python/lsst/obs/base/exposureAssembler.py | 9 +++- .../lsst/obs/base/formatters/fitsExposure.py | 41 +++++++++++++++++-- .../lsst/obs/base/formatters/fitsGeneric.py | 12 ++++++ tests/test_butlerFits.py | 4 ++ 4 files changed, 62 insertions(+), 4 deletions(-) diff --git a/python/lsst/obs/base/exposureAssembler.py b/python/lsst/obs/base/exposureAssembler.py index 0d603da5..d2786fba 100644 --- a/python/lsst/obs/base/exposureAssembler.py +++ b/python/lsst/obs/base/exposureAssembler.py @@ -28,7 +28,9 @@ # Need to enable PSFs to be instantiated import lsst.afw.detection from lsst.afw.image import Exposure, makeExposure, makeMaskedImage -from lsst.daf.butler import DatasetComponent, StorageClassDelegate +from lsst.daf.butler import DatasetComponent, DatasetRef, StorageClassDelegate + +from .formatters.fitsExposure import add_provenance_to_fits_header log = logging.getLogger(__name__) @@ -319,3 +321,8 @@ def selectResponsibleComponent(cls, readComponent: str, fromComponents: set[str if c in fromComponents: return c raise ValueError(f"Can not calculate read component {readComponent} from {fromComponents}") + + def add_provenance(self, inMemoryDataset: Any, ref: DatasetRef) -> Any: + # Add provenance via FITS headers. + add_provenance_to_fits_header(inMemoryDataset.metadata, ref) + return inMemoryDataset diff --git a/python/lsst/obs/base/formatters/fitsExposure.py b/python/lsst/obs/base/formatters/fitsExposure.py index 55862fa8..509c62c7 100644 --- a/python/lsst/obs/base/formatters/fitsExposure.py +++ b/python/lsst/obs/base/formatters/fitsExposure.py @@ -29,7 +29,7 @@ import warnings from abc import abstractmethod -from collections.abc import Set +from collections.abc import MutableMapping, Set from typing import Any, ClassVar from lsst.afw.cameraGeom import AmplifierGeometryComparison, AmplifierIsolator @@ -44,13 +44,43 @@ # Needed for ApCorrMap to resolve properly from lsst.afw.math import BoundedField # noqa: F401 -from lsst.daf.base import PropertySet -from lsst.daf.butler import FormatterV2 +from lsst.daf.base import PropertyList, PropertySet +from lsst.daf.butler import DatasetRef, FormatterV2 from lsst.resources import ResourcePath from lsst.utils.classes import cached_getter from lsst.utils.introspection import find_outside_stacklevel +def add_provenance_to_fits_header(hdr: PropertyList | MutableMapping, ref: DatasetRef) -> None: + """Modify the given header to include provenance headers. + + Parameters + ---------- + hdr : `lsst.daf.base.PropertyList` or `collections.abc.MutableMapping` + The FITS header to modify. Assumes ``HIERARCH`` will be handled + implicitly by the writer. + ref : `lsst.daf.butler.DatasetRef` + The butler dataset associated with this FITS file. + """ + # Use property list here so that we have the option of including comments. + extras = PropertyList() + hierarch = "LSST BUTLER" + extras.set(f"{hierarch} ID", str(ref.id), comment="Dataset ID") + extras.set(f"{hierarch} RUN", ref.run, comment="Run collection") + extras.set(f"{hierarch} DATASETTYPE", ref.datasetType.name, comment="Dataset type") + for k, v in sorted(ref.dataId.required.items()): + extras.set(f"{hierarch} DATAID {k.upper()}", v, comment="Data identifier") + + # Purge old headers from metadata (important for data ID headers and to + # prevent headers accumulating in a PropertyList). + for k in hdr: + if k.startswith(hierarch): + del hdr[k] + + # Update the header. + hdr.update(extras) + + class FitsImageFormatterBase(FormatterV2): """Base class formatter for image-like storage classes stored via FITS. @@ -516,6 +546,11 @@ class FitsExposureFormatter(FitsMaskedImageFormatter): ReaderClass = ExposureFitsReader + def add_provenance(self, in_memory_dataset: Any) -> Any: + # Add provenance via FITS headers. + add_provenance_to_fits_header(in_memory_dataset.metadata, self.dataset_ref) + return in_memory_dataset + def readComponent(self, component): # Docstring inherited. # Generic components can be read via a string name; DM-27754 will make diff --git a/python/lsst/obs/base/formatters/fitsGeneric.py b/python/lsst/obs/base/formatters/fitsGeneric.py index d706966b..c4cecb69 100644 --- a/python/lsst/obs/base/formatters/fitsGeneric.py +++ b/python/lsst/obs/base/formatters/fitsGeneric.py @@ -26,6 +26,8 @@ from lsst.daf.butler import FormatterV2 from lsst.resources import ResourcePath +from .fitsExposure import add_provenance_to_fits_header + class FitsGenericFormatter(FormatterV2): """Interface for reading and writing objects that support the standard @@ -50,3 +52,13 @@ def read_from_local_file(self, path: str, component: str | None = None, expected def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None: in_memory_dataset.writeFits(uri.ospath) + + def add_provenance(self, in_memory_dataset: Any) -> Any: + if hasattr(in_memory_dataset, "getMetadata"): + add_provenance_to_fits_header(in_memory_dataset.getMetadata(), self.dataset_ref) + elif hasattr(in_memory_dataset, "metadata"): + add_provenance_to_fits_header(in_memory_dataset.mtadata, self.dataset_ref) + else: + # Unable to find compliant metadata attribute. + pass + return in_memory_dataset diff --git a/tests/test_butlerFits.py b/tests/test_butlerFits.py index 9b1c4b3c..08939b5a 100644 --- a/tests/test_butlerFits.py +++ b/tests/test_butlerFits.py @@ -223,6 +223,7 @@ def testFitsCatalog(self) -> None: ref = self.butler.put(catalog, "testCatalog", dataId) stored = self.butler.get(ref) self.assertCatalogEqual(catalog, stored) + self.assertEqual(stored.metadata["LSST BUTLER ID"], str(ref.id)) # Override the storage class. astropy_table = self.butler.get(ref, storageClass="AstropyTable") @@ -256,6 +257,9 @@ def runExposureCompositePutGetTest(self, datasetTypeName: str) -> DatasetRef: # Get the full thing composite = self.butler.get(datasetTypeName, dataId) + # Check that provenance has been written. + self.assertEqual(composite.metadata["LSST BUTLER ID"], str(ref.id)) + # There is no assert for Exposure so just look at maskedImage self.assertMaskedImagesEqual(composite.maskedImage, exposure.maskedImage)