Add migration script for universe 7

lsst-dm · Apr 8, 2024 · 9e38d70 · 9e38d70
1 parent 0b9fa9e
commit 9e38d70
Show file tree

Hide file tree

Showing 3 changed files with 165 additions and 12 deletions.
diff --git a/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst b/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst
@@ -85,4 +85,11 @@ Supports group and day_obs as dimensions.
 - Rename ``group_name`` in the exposure table to ``group``.
 - Update the ``exposure`` table so ``group`` and ``day_obs`` are foreign keys to the new tables.
 - Remove ``group_id`` from ``exposure`` table.
-- Update ``config:dimensions.json`` to universe 6.
+- Update ``config:dimensions.json`` to universe 6.
+
+daf_butler 6 to 7
+=================
+
+Migration script: `352c30854bb0.py  <https://github.com/lsst-dm/daf_butler_migrate/blob/main/migrations/dimensions-config/352c30854bb0.py>`_
+
+Adds ``can_see_sky`` column to the ``exposure`` table.
diff --git a/migrations/dimensions-config/352c30854bb0.py b/migrations/dimensions-config/352c30854bb0.py
@@ -0,0 +1,127 @@
+"""Migration script for dimensions.yaml namespace=daf_butler version=7.
+
+Revision ID: 352c30854bb0
+Revises: 1fae088c80b6
+Create Date: 2024-03-28 12:14:53.021101
+
+"""
+
+import logging
+
+import sqlalchemy
+from alembic import op
+from lsst.daf.butler_migrate.migration_context import MigrationContext
+
+# revision identifiers, used by Alembic.
+revision = "352c30854bb0"
+down_revision = "1fae088c80b6"
+branch_labels = None
+depends_on = None
+
+# Logger name should start with lsst to work with butler logging option.
+_LOG = logging.getLogger(f"lsst.{__name__}")
+
+_NEW_COLUMN = "can_see_sky"
+_TABLE = "exposure"
+_OBSERVATION_TYPE_COLUMN = "observation_type"
+
+
+def upgrade() -> None:
+    """Upgrade from daf_butler universe version 6 to version 7 following update
+    of dimensions.yaml in DM-43101.
+
+    Adds ``can_see_sky`` column to the exposure table, and sets its initial
+    values based on the the ``observation_type`` column.
+    """
+
+    ctx = MigrationContext()
+
+    _LOG.info("Checking that this is an unmodified daf_butler universe 6 repo")
+    ctx.attributes.validate_dimensions_json(6)
+
+    _LOG.info("Adding can_see_sky column to exposure table")
+    op.add_column(
+        _TABLE, sqlalchemy.Column(_NEW_COLUMN, sqlalchemy.Boolean, nullable=True), schema=ctx.schema
+    )
+
+    # Set values for existing data based on the exposure's observation_type,
+    # which is closely correlated with whether the sky is visible in the
+    # exposure.
+    #
+    # Any exposures with observation types not present not in the two calls to
+    # _populate_values below will be left as NULL.
+    _LOG.info("Populating can_see_sky column")
+    table = ctx.get_table(_TABLE)
+    _populate_values(
+        table,
+        True,
+        [
+            "science",
+            "object",
+            "standard",
+            "sky flat",
+            "standard_star",
+            "skyflat",
+            "focus",
+            "focusing",
+            "exp",
+            "skyexp",
+        ],
+    )
+    _populate_values(table, False, ["dark", "bias", "agexp", "domeflat", "dome flat", "zero", "spot"])
+
+    unhandled_observation_types = _find_unhandled_observation_types(ctx)
+    if unhandled_observation_types:
+        _LOG.info(
+            "WARNING: No default value for can_see_sky is known for the following observation types:\n"
+            f"{unhandled_observation_types}\n"
+            "Exposure records with these observation types will have a NULL can_see_sky."
+        )
+    else:
+        _LOG.info("...can_see_sky values were set for all exposure records.")
+
+    _LOG.info("Updating dimensions.json in ButlerAttributes")
+    ctx.attributes.replace_dimensions_json(7)
+
+
+def downgrade() -> None:
+    """Perform schema downgrade."""
+    ctx = MigrationContext()
+
+    _LOG.info("Checking that this is an unmodified daf_butler universe 7 repo")
+    ctx.attributes.validate_dimensions_json(7)
+
+    _LOG.info("dropping can_see_sky column")
+    op.drop_column(_TABLE, _NEW_COLUMN, schema=ctx.schema)
+
+    _LOG.info("Updating dimensions.json in ButlerAttributes")
+    ctx.attributes.replace_dimensions_json(6)
+
+
+def _populate_values(table: sqlalchemy.Table, can_see_sky: bool, observation_types: list[str]) -> None:
+    """Set can_see_sky column to the specified value for all rows in the
+    exposure table that have an observation_type in the specified list.
+    """
+    op.execute(
+        table.update()
+        .values({_NEW_COLUMN: can_see_sky})
+        .where(table.columns[_OBSERVATION_TYPE_COLUMN].in_(observation_types))
+        .where(table.columns[_NEW_COLUMN].is_(None))
+    )
+
+
+def _find_unhandled_observation_types(ctx: MigrationContext) -> list[str]:
+    """Return a list of observation types present in the exposure table that
+    have a NULL value for the ``can_see_sky`` column at least one of their
+    rows.
+    """
+    table = ctx.get_table(_TABLE)
+    return list(
+        ctx.bind.execute(
+            sqlalchemy.select(table.columns[_OBSERVATION_TYPE_COLUMN])
+            .distinct()
+            .where(table.columns[_NEW_COLUMN].is_(None))
+        )
+        .scalars()
+        .all()
+    )
diff --git a/python/lsst/daf/butler_migrate/migration_context.py b/python/lsst/daf/butler_migrate/migration_context.py
@@ -21,29 +21,48 @@
 
 from __future__ import annotations
 
+__all__ = ("MigrationContext",)
+
+
 import alembic
-import sqlalchemy as sa
+import sqlalchemy
 
 from .butler_attributes import ButlerAttributes
 
-__all__ = ("MigrationContext",)
-
 
 class MigrationContext:
     """Provides access to commonly-needed objects derived from the alembic
     migration context.
     """
 
     def __init__(self) -> None:
-        self.mig_context = alembic.context.get_context()
-        self.schema = self.mig_context.version_table_schema
+        self.mig_context = (
+            alembic.context.get_context()
+        )  #: Alembic migration context for the DB being migrated.
+        self.schema = (
+            self.mig_context.version_table_schema
+        )  #: Database schema name for the repository being migrated.
         bind = self.mig_context.bind
         assert bind is not None, "Can't run offline -- need access to database to migrate data."
-        self.bind = bind
-        self.dialect = self.bind.dialect.name
-        self.is_sqlite = self.dialect == "sqlite"
-        self.metadata = sa.schema.MetaData(schema=self.schema)
+        self.bind = bind  #: A SQLAlchemy connection for the database being migrated.
+        self.dialect = self.bind.dialect.name  #: SQLAlchemy dialect for the database being migrated.
+        self.is_sqlite = self.dialect == "sqlite"  #: True if the database being migrated is SQLite.
+        self.metadata = sqlalchemy.schema.MetaData(
+            schema=self.schema
+        )  # SQLAlchemy MetaData object for the DB being migrated.
         self.attributes = ButlerAttributes(self.bind, self.schema)
 
-    def get_table(self, table_name: str) -> sa.Table:
-        return sa.schema.Table(table_name, self.metadata, autoload_with=self.bind, schema=self.schema)
+    def get_table(self, table_name: str) -> sqlalchemy.Table:
+        """Create a SQLAlchemy table object for the current database.
+
+        Parameters
+        ----------
+        table_name : `str`
+            Name of the table.
+
+        Returns
+        -------
+        table : ``sqlalchemy.Table``
+            Table object.
+        """
+        return sqlalchemy.schema.Table(table_name, self.metadata, autoload_with=self.bind, schema=self.schema)