From 9e38d70907852d0f8c461a0e6e7b255f0e77aff4 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Thu, 28 Mar 2024 14:38:57 -0700 Subject: [PATCH] Add migration script for universe 7 --- .../migrations/dimensions-config.rst | 9 +- migrations/dimensions-config/352c30854bb0.py | 127 ++++++++++++++++++ .../daf/butler_migrate/migration_context.py | 41 ++++-- 3 files changed, 165 insertions(+), 12 deletions(-) create mode 100644 migrations/dimensions-config/352c30854bb0.py diff --git a/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst b/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst index 779e08a..f608c07 100644 --- a/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst +++ b/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst @@ -85,4 +85,11 @@ Supports group and day_obs as dimensions. - Rename ``group_name`` in the exposure table to ``group``. - Update the ``exposure`` table so ``group`` and ``day_obs`` are foreign keys to the new tables. - Remove ``group_id`` from ``exposure`` table. -- Update ``config:dimensions.json`` to universe 6. \ No newline at end of file +- Update ``config:dimensions.json`` to universe 6. + +daf_butler 6 to 7 +================= + +Migration script: `352c30854bb0.py `_ + +Adds ``can_see_sky`` column to the ``exposure`` table. \ No newline at end of file diff --git a/migrations/dimensions-config/352c30854bb0.py b/migrations/dimensions-config/352c30854bb0.py new file mode 100644 index 0000000..ed366c0 --- /dev/null +++ b/migrations/dimensions-config/352c30854bb0.py @@ -0,0 +1,127 @@ +"""Migration script for dimensions.yaml namespace=daf_butler version=7. + +Revision ID: 352c30854bb0 +Revises: 1fae088c80b6 +Create Date: 2024-03-28 12:14:53.021101 + +""" + +import logging + +import sqlalchemy +from alembic import op +from lsst.daf.butler_migrate.migration_context import MigrationContext + +# revision identifiers, used by Alembic. +revision = "352c30854bb0" +down_revision = "1fae088c80b6" +branch_labels = None +depends_on = None + +# Logger name should start with lsst to work with butler logging option. +_LOG = logging.getLogger(f"lsst.{__name__}") + +_NEW_COLUMN = "can_see_sky" +_TABLE = "exposure" +_OBSERVATION_TYPE_COLUMN = "observation_type" + + +def upgrade() -> None: + """Upgrade from daf_butler universe version 6 to version 7 following update + of dimensions.yaml in DM-43101. + + Adds ``can_see_sky`` column to the exposure table, and sets its initial + values based on the the ``observation_type`` column. + """ + + ctx = MigrationContext() + + _LOG.info("Checking that this is an unmodified daf_butler universe 6 repo") + ctx.attributes.validate_dimensions_json(6) + + _LOG.info("Adding can_see_sky column to exposure table") + op.add_column( + _TABLE, sqlalchemy.Column(_NEW_COLUMN, sqlalchemy.Boolean, nullable=True), schema=ctx.schema + ) + + # Set values for existing data based on the exposure's observation_type, + # which is closely correlated with whether the sky is visible in the + # exposure. + # + # Any exposures with observation types not present not in the two calls to + # _populate_values below will be left as NULL. + _LOG.info("Populating can_see_sky column") + table = ctx.get_table(_TABLE) + _populate_values( + table, + True, + [ + "science", + "object", + "standard", + "sky flat", + "standard_star", + "skyflat", + "focus", + "focusing", + "exp", + "skyexp", + ], + ) + _populate_values(table, False, ["dark", "bias", "agexp", "domeflat", "dome flat", "zero", "spot"]) + + unhandled_observation_types = _find_unhandled_observation_types(ctx) + if unhandled_observation_types: + _LOG.info( + "WARNING: No default value for can_see_sky is known for the following observation types:\n" + f"{unhandled_observation_types}\n" + "Exposure records with these observation types will have a NULL can_see_sky." + ) + else: + _LOG.info("...can_see_sky values were set for all exposure records.") + + _LOG.info("Updating dimensions.json in ButlerAttributes") + ctx.attributes.replace_dimensions_json(7) + + +def downgrade() -> None: + """Perform schema downgrade.""" + ctx = MigrationContext() + + _LOG.info("Checking that this is an unmodified daf_butler universe 7 repo") + ctx.attributes.validate_dimensions_json(7) + + _LOG.info("dropping can_see_sky column") + op.drop_column(_TABLE, _NEW_COLUMN, schema=ctx.schema) + + _LOG.info("Updating dimensions.json in ButlerAttributes") + ctx.attributes.replace_dimensions_json(6) + + +def _populate_values(table: sqlalchemy.Table, can_see_sky: bool, observation_types: list[str]) -> None: + """Set can_see_sky column to the specified value for all rows in the + exposure table that have an observation_type in the specified list. + """ + op.execute( + table.update() + .values({_NEW_COLUMN: can_see_sky}) + .where(table.columns[_OBSERVATION_TYPE_COLUMN].in_(observation_types)) + .where(table.columns[_NEW_COLUMN].is_(None)) + ) + + +def _find_unhandled_observation_types(ctx: MigrationContext) -> list[str]: + """Return a list of observation types present in the exposure table that + have a NULL value for the ``can_see_sky`` column at least one of their + rows. + """ + table = ctx.get_table(_TABLE) + return list( + ctx.bind.execute( + sqlalchemy.select(table.columns[_OBSERVATION_TYPE_COLUMN]) + .distinct() + .where(table.columns[_NEW_COLUMN].is_(None)) + ) + .scalars() + .all() + ) diff --git a/python/lsst/daf/butler_migrate/migration_context.py b/python/lsst/daf/butler_migrate/migration_context.py index 39a3ef2..57f23ad 100644 --- a/python/lsst/daf/butler_migrate/migration_context.py +++ b/python/lsst/daf/butler_migrate/migration_context.py @@ -21,13 +21,14 @@ from __future__ import annotations +__all__ = ("MigrationContext",) + + import alembic -import sqlalchemy as sa +import sqlalchemy from .butler_attributes import ButlerAttributes -__all__ = ("MigrationContext",) - class MigrationContext: """Provides access to commonly-needed objects derived from the alembic @@ -35,15 +36,33 @@ class MigrationContext: """ def __init__(self) -> None: - self.mig_context = alembic.context.get_context() - self.schema = self.mig_context.version_table_schema + self.mig_context = ( + alembic.context.get_context() + ) #: Alembic migration context for the DB being migrated. + self.schema = ( + self.mig_context.version_table_schema + ) #: Database schema name for the repository being migrated. bind = self.mig_context.bind assert bind is not None, "Can't run offline -- need access to database to migrate data." - self.bind = bind - self.dialect = self.bind.dialect.name - self.is_sqlite = self.dialect == "sqlite" - self.metadata = sa.schema.MetaData(schema=self.schema) + self.bind = bind #: A SQLAlchemy connection for the database being migrated. + self.dialect = self.bind.dialect.name #: SQLAlchemy dialect for the database being migrated. + self.is_sqlite = self.dialect == "sqlite" #: True if the database being migrated is SQLite. + self.metadata = sqlalchemy.schema.MetaData( + schema=self.schema + ) # SQLAlchemy MetaData object for the DB being migrated. self.attributes = ButlerAttributes(self.bind, self.schema) - def get_table(self, table_name: str) -> sa.Table: - return sa.schema.Table(table_name, self.metadata, autoload_with=self.bind, schema=self.schema) + def get_table(self, table_name: str) -> sqlalchemy.Table: + """Create a SQLAlchemy table object for the current database. + + Parameters + ---------- + table_name : `str` + Name of the table. + + Returns + ------- + table : ``sqlalchemy.Table`` + Table object. + """ + return sqlalchemy.schema.Table(table_name, self.metadata, autoload_with=self.bind, schema=self.schema)