Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-40265: Add migration script for dimensions-config (v3 to v4) #28

Merged
merged 7 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11"]
python-version: ["3.11", "3.12"]

steps:
- uses: actions/checkout@v3
Expand All @@ -29,7 +29,7 @@ jobs:
- name: Update pip/wheel infrastructure
shell: bash -l {0}
run: |
conda install -y -q "pip<22" wheel
conda install -y -q pip wheel

- name: Install dependencies
shell: bash -l {0}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.10'
python-version: '3.11'
cache: "pip"
cache-dependency-path: "setup.cfg"

Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 23.10.1
rev: 23.11.0
hooks:
- id: black
# It is recommended to specify the latest version of Python
Expand All @@ -21,6 +21,6 @@ repos:
name: isort (python)
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.1.3
rev: v0.1.7
hooks:
- id: ruff
9 changes: 9 additions & 0 deletions doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,12 @@ daf_butler 2 to 3
Migration script: `c5ae3a2cd7c2.py <https://github.com/lsst-dm/daf_butler_migrate/blob/main/migrations/dimensions-config/c5ae3a2cd7c2.py>`_

Changes the size of the ``observation_reason`` column in visit and exposure tables from 32 characters to 68.


daf_butler 3 to 4
=================

Migration script: `9888256c6a18.py <https://github.com/lsst-dm/daf_butler_migrate/blob/main/migrations/dimensions-config/9888256c6a18.py>`_

Does not change the schema, only updates the contents of ``config:dimensions.json``.
Three elements in dimensions configuration add ``populated_by: visit`` option.
17 changes: 2 additions & 15 deletions migrations/_alembic/env.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from alembic import context
from sqlalchemy import engine, engine_from_config, pool
from sqlalchemy import engine_from_config, pool

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand Down Expand Up @@ -50,22 +50,9 @@ def run_migrations_online() -> None:
and associate a connection with the context.

"""
# executemany_* arguments are postgres-specific, need to check dialect
url_str = config.get_main_option("sqlalchemy.url")
assert url_str is not None, "Expect URL connection defined."
url = engine.url.make_url(url_str)
if url.get_dialect().name == "postgresql":
kwargs = dict(
executemany_mode="values",
executemany_values_page_size=10000,
executemany_batch_page_size=500,
)
else:
kwargs = {}

config_dict = config.get_section(config.config_ini_section)
assert config_dict is not None, "Expect non-empty configuration"
connectable = engine_from_config(config_dict, prefix="sqlalchemy.", poolclass=pool.NullPool, **kwargs)
connectable = engine_from_config(config_dict, prefix="sqlalchemy.", poolclass=pool.NullPool)

schema = config.get_section_option("daf_butler_migrate", "schema")
with connectable.connect() as connection:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def upgrade() -> None:

# drop mapping table
_LOG.debug("Dropping mapping table")
op.drop_table(ID_MAP_TABLE_NAME, schema)
op.drop_table(ID_MAP_TABLE_NAME, schema=schema)

# refresh schema from database
metadata = sa.schema.MetaData(schema=schema)
Expand Down
12 changes: 6 additions & 6 deletions migrations/datasets/4e2d7a28475b.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def _migrate_default(
# There may be very many records in dataset table to fit everything in
# memory, so split the whole thing on dataset_type_id.
query = sa.select(table.columns["dataset_type_id"]).select_from(table).distinct()
result = bind.execute(query).scalars()
dataset_type_ids = sorted(result)
scalars = bind.execute(query).scalars()
dataset_type_ids = sorted(scalars)
_LOG.info("Found %s dataset types in dataset table", len(dataset_type_ids))

for dataset_type_id in dataset_type_ids:
Expand All @@ -140,8 +140,8 @@ def _migrate_default(
iterator = iter(rows)
count = 0
while chunk := list(itertools.islice(iterator, 1000)):
query = tmp_table.insert().values(chunk)
result = bind.execute(query)
insert = tmp_table.insert().values(chunk)
result = bind.execute(insert)
count += result.rowcount
_LOG.info("Inserted %s rows into temporary table", count)

Expand All @@ -156,12 +156,12 @@ def _migrate_default(
)

# Update ingest date from a temporary table.
query = table.update().values(
update = table.update().values(
ingest_date=sa.select(tmp_table.columns["ingest_date"])
.where(tmp_table.columns["id"] == table.columns["id"])
.scalar_subquery()
)
result = bind.execute(query)
result = bind.execute(update)
_LOG.info("Updated %s rows in dataset table", result.rowcount)

# Update manager schema version.
Expand Down
68 changes: 68 additions & 0 deletions migrations/dimensions-config/9888256c6a18.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Migration script for dimensions.yaml namespace=daf_butler version=4.

Revision ID: 9888256c6a18
Revises: c5ae3a2cd7c2
Create Date: 2023-12-04 18:16:25.375102

"""
from alembic import context
from lsst.daf.butler_migrate.butler_attributes import ButlerAttributes

# revision identifiers, used by Alembic.
revision = "9888256c6a18"
down_revision = "c5ae3a2cd7c2"
branch_labels = None
depends_on = None


def upgrade() -> None:
"""Upgrade from version 3 to version 4 following update of dimension.yaml
in DM-34589.

Summary of changes:

- Database schema did not change, only contents of
`config:dimensions.json` in `butler_attributes`.
- Changes in `config:dimensions.json`:
- "version" value updated to 4.
- "populated_by: visit" added to three elements:
`visit_detector_region`, `visit_definition` and
`visit_system_membership`.
"""
_migrate(3, 4, True)


def downgrade() -> None:
"""Undo changes made in upgrade()."""
_migrate(4, 3, False)


def _migrate(old_version: int, new_version: int, upgrade: bool) -> None:
"""Do migration in either direction."""

def _update_config(config: dict) -> dict:
"""Update dimension.json configuration"""
assert config["version"] == old_version, f"dimensions.json version mismatch: {config['version']}"

config["version"] = new_version

elements = config["elements"]
for element_name in ("visit_detector_region", "visit_definition", "visit_system_membership"):
element = elements[element_name]
if upgrade:
element["populated_by"] = "visit"
else:
del element["populated_by"]

return config

mig_context = context.get_context()

# When we use schemas in postgres then all tables belong to the same schema
# so we can use alembic's version_table_schema to see where everything goes
schema = mig_context.version_table_schema

# Update attributes
assert mig_context.bind is not None
attributes = ButlerAttributes(mig_context.bind, schema)
attributes.update_dimensions_json(_update_config)
8 changes: 6 additions & 2 deletions migrations/obscore-config/4fe28ef5030f.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import json
from typing import TYPE_CHECKING

import yaml
from alembic import context, op
Expand All @@ -15,6 +16,9 @@
from lsst.daf.butler_migrate.registry import make_registry
from lsst.utils import doImportType

if TYPE_CHECKING:
from lsst.daf.butler.registry.obscore import ObsCoreLiveTableManager

# revision identifiers, used by Alembic.
revision = "4fe28ef5030f"
down_revision = "2daeabfb5019"
Expand Down Expand Up @@ -142,7 +146,7 @@ def _make_obscore_table(obscore_config: dict) -> None:
manager_class_name = attributes.get("config:registry.managers.obscore")
if manager_class_name is None:
raise ValueError("Registry obscore manager has to be configured in butler_attributes")
manager_class = doImportType(manager_class_name)
manager_class: type[ObsCoreLiveTableManager] = doImportType(manager_class_name)

repository = context.config.get_section_option("daf_butler_migrate", "repository")
assert repository is not None, "Need repository in configuration"
Expand All @@ -154,7 +158,7 @@ def _make_obscore_table(obscore_config: dict) -> None:
database = registry._db
managers = registry._managers
with database.declareStaticTables(create=False) as staticTablesContext:
manager = manager_class.initialize(
manager: ObsCoreLiveTableManager = manager_class.initialize( # type: ignore[assignment]
database,
staticTablesContext,
universe=registry.dimensions,
Expand Down
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ classifiers = [
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Astronomy",
]
keywords = ["lsst"]
Expand Down Expand Up @@ -55,7 +56,7 @@ version = { attr = "lsst_versions.get_lsst_version" }

[tool.black]
line-length = 110
target-version = ["py310"]
target-version = ["py311"]

[tool.isort]
profile = "black"
Expand Down Expand Up @@ -96,7 +97,7 @@ select = [
"UP",
"C4",
]
target-version = "py310"
target-version = "py311"
extend-select = [
"RUF100", # Warn about unused noqa
]
Expand Down
13 changes: 8 additions & 5 deletions tests/test_dimensions_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import os
import unittest

from lsst.daf.butler import Butler, Config, Registry
from lsst.daf.butler import Butler, Config
from lsst.daf.butler.direct_butler import DirectButler
from lsst.daf.butler.registry.sql_registry import SqlRegistry
from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
from lsst.daf.butler.transfers import YamlRepoImportBackend
from lsst.daf.butler_migrate import database, migrate, script
Expand Down Expand Up @@ -63,7 +65,7 @@ def make_butler_v0(self) -> Config:
self.db = database.Database.from_repo(self.root)
return config

def load_data(self, registry: Registry, filename: str) -> None:
def load_data(self, registry: SqlRegistry, filename: str) -> None:
"""Load registry test data from filename in data folder."""
with open(os.path.join(TESTDIR, "data", filename)) as stream:
backend = YamlRepoImportBackend(stream, registry)
Expand Down Expand Up @@ -121,8 +123,9 @@ def test_upgrade_v2(self) -> None:
versions = self.db.manager_versions(_NAMESPACE)
self.assertEqual(versions["dimensions-config"], (_NAMESPACE, "0", _REVISION_V0))

butler = Butler(config, writeable=True)
self.load_data(butler.registry, "records.yaml")
butler = Butler.from_config(config, writeable=True)
assert isinstance(butler, DirectButler), "Only DirectButler is supported"
self.load_data(butler._registry, "records.yaml")

# Check records for v0 attributes.
records = list(butler.registry.queryDimensionRecords("visit"))
Expand Down Expand Up @@ -164,7 +167,7 @@ def test_upgrade_v2(self) -> None:
versions = self.db.manager_versions()
self.assertEqual(versions["dimensions-config"], (_NAMESPACE, "2", _REVISION_V2))

butler = Butler(config, writeable=False)
butler = Butler.from_config(config, writeable=False)

# Check records for v2 attributes.
records = list(butler.registry.queryDimensionRecords("instrument"))
Expand Down
Loading