Skip to content

Commit

Permalink
Merge pull request #28 from lsst-dm/tickets/DM-40265
Browse files Browse the repository at this point in the history
DM-40265: Add migration script for dimensions-config (v3 to v4)
  • Loading branch information
andy-slac committed Dec 5, 2023
2 parents 83a83a8 + 85bd7d5 commit 59d4c9b
Show file tree
Hide file tree
Showing 11 changed files with 109 additions and 37 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11"]
python-version: ["3.11", "3.12"]

steps:
- uses: actions/checkout@v3
Expand All @@ -29,7 +29,7 @@ jobs:
- name: Update pip/wheel infrastructure
shell: bash -l {0}
run: |
conda install -y -q "pip<22" wheel
conda install -y -q pip wheel
- name: Install dependencies
shell: bash -l {0}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.10'
python-version: '3.11'
cache: "pip"
cache-dependency-path: "setup.cfg"

Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 23.10.1
rev: 23.11.0
hooks:
- id: black
# It is recommended to specify the latest version of Python
Expand All @@ -21,6 +21,6 @@ repos:
name: isort (python)
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.3
rev: v0.1.7
hooks:
- id: ruff
9 changes: 9 additions & 0 deletions doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,12 @@ daf_butler 2 to 3
Migration script: `c5ae3a2cd7c2.py <https://github.com/lsst-dm/daf_butler_migrate/blob/main/migrations/dimensions-config/c5ae3a2cd7c2.py>`_

Changes the size of the ``observation_reason`` column in visit and exposure tables from 32 characters to 68.


daf_butler 3 to 4
=================

Migration script: `9888256c6a18.py <https://github.com/lsst-dm/daf_butler_migrate/blob/main/migrations/dimensions-config/9888256c6a18.py>`_

Does not change the schema, only updates the contents of ``config:dimensions.json``.
Three elements in dimensions configuration add ``populated_by: visit`` option.
17 changes: 2 additions & 15 deletions migrations/_alembic/env.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from alembic import context
from sqlalchemy import engine, engine_from_config, pool
from sqlalchemy import engine_from_config, pool

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand Down Expand Up @@ -50,22 +50,9 @@ def run_migrations_online() -> None:
and associate a connection with the context.
"""
# executemany_* arguments are postgres-specific, need to check dialect
url_str = config.get_main_option("sqlalchemy.url")
assert url_str is not None, "Expect URL connection defined."
url = engine.url.make_url(url_str)
if url.get_dialect().name == "postgresql":
kwargs = dict(
executemany_mode="values",
executemany_values_page_size=10000,
executemany_batch_page_size=500,
)
else:
kwargs = {}

config_dict = config.get_section(config.config_ini_section)
assert config_dict is not None, "Expect non-empty configuration"
connectable = engine_from_config(config_dict, prefix="sqlalchemy.", poolclass=pool.NullPool, **kwargs)
connectable = engine_from_config(config_dict, prefix="sqlalchemy.", poolclass=pool.NullPool)

schema = config.get_section_option("daf_butler_migrate", "schema")
with connectable.connect() as connection:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def upgrade() -> None:

# drop mapping table
_LOG.debug("Dropping mapping table")
op.drop_table(ID_MAP_TABLE_NAME, schema)
op.drop_table(ID_MAP_TABLE_NAME, schema=schema)

# refresh schema from database
metadata = sa.schema.MetaData(schema=schema)
Expand Down
12 changes: 6 additions & 6 deletions migrations/datasets/4e2d7a28475b.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def _migrate_default(
# There may be very many records in dataset table to fit everything in
# memory, so split the whole thing on dataset_type_id.
query = sa.select(table.columns["dataset_type_id"]).select_from(table).distinct()
result = bind.execute(query).scalars()
dataset_type_ids = sorted(result)
scalars = bind.execute(query).scalars()
dataset_type_ids = sorted(scalars)
_LOG.info("Found %s dataset types in dataset table", len(dataset_type_ids))

for dataset_type_id in dataset_type_ids:
Expand All @@ -140,8 +140,8 @@ def _migrate_default(
iterator = iter(rows)
count = 0
while chunk := list(itertools.islice(iterator, 1000)):
query = tmp_table.insert().values(chunk)
result = bind.execute(query)
insert = tmp_table.insert().values(chunk)
result = bind.execute(insert)
count += result.rowcount
_LOG.info("Inserted %s rows into temporary table", count)

Expand All @@ -156,12 +156,12 @@ def _migrate_default(
)

# Update ingest date from a temporary table.
query = table.update().values(
update = table.update().values(
ingest_date=sa.select(tmp_table.columns["ingest_date"])
.where(tmp_table.columns["id"] == table.columns["id"])
.scalar_subquery()
)
result = bind.execute(query)
result = bind.execute(update)
_LOG.info("Updated %s rows in dataset table", result.rowcount)

# Update manager schema version.
Expand Down
68 changes: 68 additions & 0 deletions migrations/dimensions-config/9888256c6a18.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Migration script for dimensions.yaml namespace=daf_butler version=4.
Revision ID: 9888256c6a18
Revises: c5ae3a2cd7c2
Create Date: 2023-12-04 18:16:25.375102
"""
from alembic import context
from lsst.daf.butler_migrate.butler_attributes import ButlerAttributes

# revision identifiers, used by Alembic.
revision = "9888256c6a18"
down_revision = "c5ae3a2cd7c2"
branch_labels = None
depends_on = None


def upgrade() -> None:
"""Upgrade from version 3 to version 4 following update of dimension.yaml
in DM-34589.
Summary of changes:
- Database schema did not change, only contents of
`config:dimensions.json` in `butler_attributes`.
- Changes in `config:dimensions.json`:
- "version" value updated to 4.
- "populated_by: visit" added to three elements:
`visit_detector_region`, `visit_definition` and
`visit_system_membership`.
"""
_migrate(3, 4, True)


def downgrade() -> None:
"""Undo changes made in upgrade()."""
_migrate(4, 3, False)


def _migrate(old_version: int, new_version: int, upgrade: bool) -> None:
"""Do migration in either direction."""

def _update_config(config: dict) -> dict:
"""Update dimension.json configuration"""
assert config["version"] == old_version, f"dimensions.json version mismatch: {config['version']}"

config["version"] = new_version

elements = config["elements"]
for element_name in ("visit_detector_region", "visit_definition", "visit_system_membership"):
element = elements[element_name]
if upgrade:
element["populated_by"] = "visit"
else:
del element["populated_by"]

return config

mig_context = context.get_context()

# When we use schemas in postgres then all tables belong to the same schema
# so we can use alembic's version_table_schema to see where everything goes
schema = mig_context.version_table_schema

# Update attributes
assert mig_context.bind is not None
attributes = ButlerAttributes(mig_context.bind, schema)
attributes.update_dimensions_json(_update_config)
8 changes: 6 additions & 2 deletions migrations/obscore-config/4fe28ef5030f.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import json
from typing import TYPE_CHECKING

import yaml
from alembic import context, op
Expand All @@ -15,6 +16,9 @@
from lsst.daf.butler_migrate.registry import make_registry
from lsst.utils import doImportType

if TYPE_CHECKING:
from lsst.daf.butler.registry.obscore import ObsCoreLiveTableManager

# revision identifiers, used by Alembic.
revision = "4fe28ef5030f"
down_revision = "2daeabfb5019"
Expand Down Expand Up @@ -142,7 +146,7 @@ def _make_obscore_table(obscore_config: dict) -> None:
manager_class_name = attributes.get("config:registry.managers.obscore")
if manager_class_name is None:
raise ValueError("Registry obscore manager has to be configured in butler_attributes")
manager_class = doImportType(manager_class_name)
manager_class: type[ObsCoreLiveTableManager] = doImportType(manager_class_name)

repository = context.config.get_section_option("daf_butler_migrate", "repository")
assert repository is not None, "Need repository in configuration"
Expand All @@ -154,7 +158,7 @@ def _make_obscore_table(obscore_config: dict) -> None:
database = registry._db
managers = registry._managers
with database.declareStaticTables(create=False) as staticTablesContext:
manager = manager_class.initialize(
manager: ObsCoreLiveTableManager = manager_class.initialize( # type: ignore[assignment]
database,
staticTablesContext,
universe=registry.dimensions,
Expand Down
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ classifiers = [
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Astronomy",
]
keywords = ["lsst"]
Expand Down Expand Up @@ -55,7 +56,7 @@ version = { attr = "lsst_versions.get_lsst_version" }

[tool.black]
line-length = 110
target-version = ["py310"]
target-version = ["py311"]

[tool.isort]
profile = "black"
Expand Down Expand Up @@ -96,7 +97,7 @@ select = [
"UP",
"C4",
]
target-version = "py310"
target-version = "py311"
extend-select = [
"RUF100", # Warn about unused noqa
]
Expand Down
13 changes: 8 additions & 5 deletions tests/test_dimensions_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import os
import unittest

from lsst.daf.butler import Butler, Config, Registry
from lsst.daf.butler import Butler, Config
from lsst.daf.butler.direct_butler import DirectButler
from lsst.daf.butler.registry.sql_registry import SqlRegistry
from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
from lsst.daf.butler.transfers import YamlRepoImportBackend
from lsst.daf.butler_migrate import database, migrate, script
Expand Down Expand Up @@ -63,7 +65,7 @@ def make_butler_v0(self) -> Config:
self.db = database.Database.from_repo(self.root)
return config

def load_data(self, registry: Registry, filename: str) -> None:
def load_data(self, registry: SqlRegistry, filename: str) -> None:
"""Load registry test data from filename in data folder."""
with open(os.path.join(TESTDIR, "data", filename)) as stream:
backend = YamlRepoImportBackend(stream, registry)
Expand Down Expand Up @@ -121,8 +123,9 @@ def test_upgrade_v2(self) -> None:
versions = self.db.manager_versions(_NAMESPACE)
self.assertEqual(versions["dimensions-config"], (_NAMESPACE, "0", _REVISION_V0))

butler = Butler(config, writeable=True)
self.load_data(butler.registry, "records.yaml")
butler = Butler.from_config(config, writeable=True)
assert isinstance(butler, DirectButler), "Only DirectButler is supported"
self.load_data(butler._registry, "records.yaml")

# Check records for v0 attributes.
records = list(butler.registry.queryDimensionRecords("visit"))
Expand Down Expand Up @@ -164,7 +167,7 @@ def test_upgrade_v2(self) -> None:
versions = self.db.manager_versions()
self.assertEqual(versions["dimensions-config"], (_NAMESPACE, "2", _REVISION_V2))

butler = Butler(config, writeable=False)
butler = Butler.from_config(config, writeable=False)

# Check records for v2 attributes.
records = list(butler.registry.queryDimensionRecords("instrument"))
Expand Down

0 comments on commit 59d4c9b

Please sign in to comment.