Skip to content

Commit

Permalink
Add migration script for universe 7
Browse files Browse the repository at this point in the history
  • Loading branch information
dhirving committed Apr 8, 2024
1 parent 0b9fa9e commit 9e38d70
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 12 deletions.
9 changes: 8 additions & 1 deletion doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,11 @@ Supports group and day_obs as dimensions.
- Rename ``group_name`` in the exposure table to ``group``.
- Update the ``exposure`` table so ``group`` and ``day_obs`` are foreign keys to the new tables.
- Remove ``group_id`` from ``exposure`` table.
- Update ``config:dimensions.json`` to universe 6.
- Update ``config:dimensions.json`` to universe 6.

daf_butler 6 to 7
=================

Migration script: `352c30854bb0.py <https://github.com/lsst-dm/daf_butler_migrate/blob/main/migrations/dimensions-config/352c30854bb0.py>`_

Adds ``can_see_sky`` column to the ``exposure`` table.
127 changes: 127 additions & 0 deletions migrations/dimensions-config/352c30854bb0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""Migration script for dimensions.yaml namespace=daf_butler version=7.
Revision ID: 352c30854bb0
Revises: 1fae088c80b6
Create Date: 2024-03-28 12:14:53.021101
"""

import logging

import sqlalchemy
from alembic import op
from lsst.daf.butler_migrate.migration_context import MigrationContext

# revision identifiers, used by Alembic.
revision = "352c30854bb0"
down_revision = "1fae088c80b6"
branch_labels = None
depends_on = None

# Logger name should start with lsst to work with butler logging option.
_LOG = logging.getLogger(f"lsst.{__name__}")

_NEW_COLUMN = "can_see_sky"
_TABLE = "exposure"
_OBSERVATION_TYPE_COLUMN = "observation_type"


def upgrade() -> None:
"""Upgrade from daf_butler universe version 6 to version 7 following update
of dimensions.yaml in DM-43101.
Adds ``can_see_sky`` column to the exposure table, and sets its initial
values based on the the ``observation_type`` column.
"""

ctx = MigrationContext()

_LOG.info("Checking that this is an unmodified daf_butler universe 6 repo")
ctx.attributes.validate_dimensions_json(6)

_LOG.info("Adding can_see_sky column to exposure table")
op.add_column(
_TABLE, sqlalchemy.Column(_NEW_COLUMN, sqlalchemy.Boolean, nullable=True), schema=ctx.schema
)

# Set values for existing data based on the exposure's observation_type,
# which is closely correlated with whether the sky is visible in the
# exposure.
#
# Any exposures with observation types not present not in the two calls to
# _populate_values below will be left as NULL.
_LOG.info("Populating can_see_sky column")
table = ctx.get_table(_TABLE)
_populate_values(
table,
True,
[
"science",
"object",
"standard",
"sky flat",
"standard_star",
"skyflat",
"focus",
"focusing",
"exp",
"skyexp",
],
)
_populate_values(table, False, ["dark", "bias", "agexp", "domeflat", "dome flat", "zero", "spot"])

unhandled_observation_types = _find_unhandled_observation_types(ctx)
if unhandled_observation_types:
_LOG.info(
"WARNING: No default value for can_see_sky is known for the following observation types:\n"
f"{unhandled_observation_types}\n"
"Exposure records with these observation types will have a NULL can_see_sky."
)
else:
_LOG.info("...can_see_sky values were set for all exposure records.")

_LOG.info("Updating dimensions.json in ButlerAttributes")
ctx.attributes.replace_dimensions_json(7)


def downgrade() -> None:
"""Perform schema downgrade."""
ctx = MigrationContext()

_LOG.info("Checking that this is an unmodified daf_butler universe 7 repo")
ctx.attributes.validate_dimensions_json(7)

_LOG.info("dropping can_see_sky column")
op.drop_column(_TABLE, _NEW_COLUMN, schema=ctx.schema)

_LOG.info("Updating dimensions.json in ButlerAttributes")
ctx.attributes.replace_dimensions_json(6)


def _populate_values(table: sqlalchemy.Table, can_see_sky: bool, observation_types: list[str]) -> None:
"""Set can_see_sky column to the specified value for all rows in the
exposure table that have an observation_type in the specified list.
"""
op.execute(
table.update()
.values({_NEW_COLUMN: can_see_sky})
.where(table.columns[_OBSERVATION_TYPE_COLUMN].in_(observation_types))
.where(table.columns[_NEW_COLUMN].is_(None))
)


def _find_unhandled_observation_types(ctx: MigrationContext) -> list[str]:
"""Return a list of observation types present in the exposure table that
have a NULL value for the ``can_see_sky`` column at least one of their
rows.
"""
table = ctx.get_table(_TABLE)
return list(
ctx.bind.execute(
sqlalchemy.select(table.columns[_OBSERVATION_TYPE_COLUMN])
.distinct()
.where(table.columns[_NEW_COLUMN].is_(None))
)
.scalars()
.all()
)
41 changes: 30 additions & 11 deletions python/lsst/daf/butler_migrate/migration_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,48 @@

from __future__ import annotations

__all__ = ("MigrationContext",)


import alembic
import sqlalchemy as sa
import sqlalchemy

from .butler_attributes import ButlerAttributes

__all__ = ("MigrationContext",)


class MigrationContext:
"""Provides access to commonly-needed objects derived from the alembic
migration context.
"""

def __init__(self) -> None:
self.mig_context = alembic.context.get_context()
self.schema = self.mig_context.version_table_schema
self.mig_context = (
alembic.context.get_context()
) #: Alembic migration context for the DB being migrated.
self.schema = (
self.mig_context.version_table_schema
) #: Database schema name for the repository being migrated.
bind = self.mig_context.bind
assert bind is not None, "Can't run offline -- need access to database to migrate data."
self.bind = bind
self.dialect = self.bind.dialect.name
self.is_sqlite = self.dialect == "sqlite"
self.metadata = sa.schema.MetaData(schema=self.schema)
self.bind = bind #: A SQLAlchemy connection for the database being migrated.
self.dialect = self.bind.dialect.name #: SQLAlchemy dialect for the database being migrated.
self.is_sqlite = self.dialect == "sqlite" #: True if the database being migrated is SQLite.
self.metadata = sqlalchemy.schema.MetaData(
schema=self.schema
) # SQLAlchemy MetaData object for the DB being migrated.
self.attributes = ButlerAttributes(self.bind, self.schema)

def get_table(self, table_name: str) -> sa.Table:
return sa.schema.Table(table_name, self.metadata, autoload_with=self.bind, schema=self.schema)
def get_table(self, table_name: str) -> sqlalchemy.Table:
"""Create a SQLAlchemy table object for the current database.
Parameters
----------
table_name : `str`
Name of the table.
Returns
-------
table : ``sqlalchemy.Table``
Table object.
"""
return sqlalchemy.schema.Table(table_name, self.metadata, autoload_with=self.bind, schema=self.schema)

0 comments on commit 9e38d70

Please sign in to comment.