diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f131fbd..4de5c3c 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11"] + python-version: ["3.11", "3.12"] steps: - uses: actions/checkout@v3 @@ -29,7 +29,7 @@ jobs: - name: Update pip/wheel infrastructure shell: bash -l {0} run: | - conda install -y -q "pip<22" wheel + conda install -y -q pip wheel - name: Install dependencies shell: bash -l {0} diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml index 218e467..8122872 100644 --- a/.github/workflows/build_docs.yaml +++ b/.github/workflows/build_docs.yaml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: '3.10' + python-version: '3.11' cache: "pip" cache-dependency-path: "setup.cfg" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b793031..12182ea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/psf/black - rev: 23.10.1 + rev: 23.11.0 hooks: - id: black # It is recommended to specify the latest version of Python @@ -21,6 +21,6 @@ repos: name: isort (python) - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.1.3 + rev: v0.1.7 hooks: - id: ruff diff --git a/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst b/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst index bbe3cfa..9e10363 100644 --- a/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst +++ b/doc/lsst.daf.butler_migrate/migrations/dimensions-config.rst @@ -53,3 +53,12 @@ daf_butler 2 to 3 Migration script: `c5ae3a2cd7c2.py `_ Changes the size of the ``observation_reason`` column in visit and exposure tables from 32 characters to 68. + + +daf_butler 3 to 4 +================= + +Migration script: `9888256c6a18.py `_ + +Does not change the schema, only updates the contents of ``config:dimensions.json``. +Three elements in dimensions configuration add ``populated_by: visit`` option. diff --git a/migrations/_alembic/env.py b/migrations/_alembic/env.py index 8eb8878..2fd2299 100644 --- a/migrations/_alembic/env.py +++ b/migrations/_alembic/env.py @@ -1,5 +1,5 @@ from alembic import context -from sqlalchemy import engine, engine_from_config, pool +from sqlalchemy import engine_from_config, pool # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -50,22 +50,9 @@ def run_migrations_online() -> None: and associate a connection with the context. """ - # executemany_* arguments are postgres-specific, need to check dialect - url_str = config.get_main_option("sqlalchemy.url") - assert url_str is not None, "Expect URL connection defined." - url = engine.url.make_url(url_str) - if url.get_dialect().name == "postgresql": - kwargs = dict( - executemany_mode="values", - executemany_values_page_size=10000, - executemany_batch_page_size=500, - ) - else: - kwargs = {} - config_dict = config.get_section(config.config_ini_section) assert config_dict is not None, "Expect non-empty configuration" - connectable = engine_from_config(config_dict, prefix="sqlalchemy.", poolclass=pool.NullPool, **kwargs) + connectable = engine_from_config(config_dict, prefix="sqlalchemy.", poolclass=pool.NullPool) schema = config.get_section_option("daf_butler_migrate", "schema") with connectable.connect() as connection: diff --git a/migrations/_oneshot/datasets/int_1.0.0_to_uuid_1.0.0/2101fbf51ad3.py b/migrations/_oneshot/datasets/int_1.0.0_to_uuid_1.0.0/2101fbf51ad3.py index 3d70e7e..07fce08 100644 --- a/migrations/_oneshot/datasets/int_1.0.0_to_uuid_1.0.0/2101fbf51ad3.py +++ b/migrations/_oneshot/datasets/int_1.0.0_to_uuid_1.0.0/2101fbf51ad3.py @@ -151,7 +151,7 @@ def upgrade() -> None: # drop mapping table _LOG.debug("Dropping mapping table") - op.drop_table(ID_MAP_TABLE_NAME, schema) + op.drop_table(ID_MAP_TABLE_NAME, schema=schema) # refresh schema from database metadata = sa.schema.MetaData(schema=schema) diff --git a/migrations/datasets/4e2d7a28475b.py b/migrations/datasets/4e2d7a28475b.py index 624567c..ff04df5 100644 --- a/migrations/datasets/4e2d7a28475b.py +++ b/migrations/datasets/4e2d7a28475b.py @@ -122,8 +122,8 @@ def _migrate_default( # There may be very many records in dataset table to fit everything in # memory, so split the whole thing on dataset_type_id. query = sa.select(table.columns["dataset_type_id"]).select_from(table).distinct() - result = bind.execute(query).scalars() - dataset_type_ids = sorted(result) + scalars = bind.execute(query).scalars() + dataset_type_ids = sorted(scalars) _LOG.info("Found %s dataset types in dataset table", len(dataset_type_ids)) for dataset_type_id in dataset_type_ids: @@ -140,8 +140,8 @@ def _migrate_default( iterator = iter(rows) count = 0 while chunk := list(itertools.islice(iterator, 1000)): - query = tmp_table.insert().values(chunk) - result = bind.execute(query) + insert = tmp_table.insert().values(chunk) + result = bind.execute(insert) count += result.rowcount _LOG.info("Inserted %s rows into temporary table", count) @@ -156,12 +156,12 @@ def _migrate_default( ) # Update ingest date from a temporary table. - query = table.update().values( + update = table.update().values( ingest_date=sa.select(tmp_table.columns["ingest_date"]) .where(tmp_table.columns["id"] == table.columns["id"]) .scalar_subquery() ) - result = bind.execute(query) + result = bind.execute(update) _LOG.info("Updated %s rows in dataset table", result.rowcount) # Update manager schema version. diff --git a/migrations/dimensions-config/9888256c6a18.py b/migrations/dimensions-config/9888256c6a18.py new file mode 100644 index 0000000..2d902bb --- /dev/null +++ b/migrations/dimensions-config/9888256c6a18.py @@ -0,0 +1,68 @@ +"""Migration script for dimensions.yaml namespace=daf_butler version=4. + +Revision ID: 9888256c6a18 +Revises: c5ae3a2cd7c2 +Create Date: 2023-12-04 18:16:25.375102 + +""" +from alembic import context +from lsst.daf.butler_migrate.butler_attributes import ButlerAttributes + +# revision identifiers, used by Alembic. +revision = "9888256c6a18" +down_revision = "c5ae3a2cd7c2" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """Upgrade from version 3 to version 4 following update of dimension.yaml + in DM-34589. + + Summary of changes: + + - Database schema did not change, only contents of + `config:dimensions.json` in `butler_attributes`. + - Changes in `config:dimensions.json`: + - "version" value updated to 4. + - "populated_by: visit" added to three elements: + `visit_detector_region`, `visit_definition` and + `visit_system_membership`. + """ + _migrate(3, 4, True) + + +def downgrade() -> None: + """Undo changes made in upgrade().""" + _migrate(4, 3, False) + + +def _migrate(old_version: int, new_version: int, upgrade: bool) -> None: + """Do migration in either direction.""" + + def _update_config(config: dict) -> dict: + """Update dimension.json configuration""" + assert config["version"] == old_version, f"dimensions.json version mismatch: {config['version']}" + + config["version"] = new_version + + elements = config["elements"] + for element_name in ("visit_detector_region", "visit_definition", "visit_system_membership"): + element = elements[element_name] + if upgrade: + element["populated_by"] = "visit" + else: + del element["populated_by"] + + return config + + mig_context = context.get_context() + + # When we use schemas in postgres then all tables belong to the same schema + # so we can use alembic's version_table_schema to see where everything goes + schema = mig_context.version_table_schema + + # Update attributes + assert mig_context.bind is not None + attributes = ButlerAttributes(mig_context.bind, schema) + attributes.update_dimensions_json(_update_config) diff --git a/migrations/obscore-config/4fe28ef5030f.py b/migrations/obscore-config/4fe28ef5030f.py index bbb712c..cd6e26c 100644 --- a/migrations/obscore-config/4fe28ef5030f.py +++ b/migrations/obscore-config/4fe28ef5030f.py @@ -7,6 +7,7 @@ """ import json +from typing import TYPE_CHECKING import yaml from alembic import context, op @@ -15,6 +16,9 @@ from lsst.daf.butler_migrate.registry import make_registry from lsst.utils import doImportType +if TYPE_CHECKING: + from lsst.daf.butler.registry.obscore import ObsCoreLiveTableManager + # revision identifiers, used by Alembic. revision = "4fe28ef5030f" down_revision = "2daeabfb5019" @@ -142,7 +146,7 @@ def _make_obscore_table(obscore_config: dict) -> None: manager_class_name = attributes.get("config:registry.managers.obscore") if manager_class_name is None: raise ValueError("Registry obscore manager has to be configured in butler_attributes") - manager_class = doImportType(manager_class_name) + manager_class: type[ObsCoreLiveTableManager] = doImportType(manager_class_name) repository = context.config.get_section_option("daf_butler_migrate", "repository") assert repository is not None, "Need repository in configuration" @@ -154,7 +158,7 @@ def _make_obscore_table(obscore_config: dict) -> None: database = registry._db managers = registry._managers with database.declareStaticTables(create=False) as staticTablesContext: - manager = manager_class.initialize( + manager: ObsCoreLiveTableManager = manager_class.initialize( # type: ignore[assignment] database, staticTablesContext, universe=registry.dimensions, diff --git a/pyproject.toml b/pyproject.toml index 205016e..16be882 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,8 @@ classifiers = [ "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", "Operating System :: OS Independent", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Astronomy", ] keywords = ["lsst"] @@ -55,7 +56,7 @@ version = { attr = "lsst_versions.get_lsst_version" } [tool.black] line-length = 110 -target-version = ["py310"] +target-version = ["py311"] [tool.isort] profile = "black" @@ -96,7 +97,7 @@ select = [ "UP", "C4", ] -target-version = "py310" +target-version = "py311" extend-select = [ "RUF100", # Warn about unused noqa ] diff --git a/tests/test_dimensions_json.py b/tests/test_dimensions_json.py index 4171e56..5dbce4c 100644 --- a/tests/test_dimensions_json.py +++ b/tests/test_dimensions_json.py @@ -22,7 +22,9 @@ import os import unittest -from lsst.daf.butler import Butler, Config, Registry +from lsst.daf.butler import Butler, Config +from lsst.daf.butler.direct_butler import DirectButler +from lsst.daf.butler.registry.sql_registry import SqlRegistry from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir from lsst.daf.butler.transfers import YamlRepoImportBackend from lsst.daf.butler_migrate import database, migrate, script @@ -63,7 +65,7 @@ def make_butler_v0(self) -> Config: self.db = database.Database.from_repo(self.root) return config - def load_data(self, registry: Registry, filename: str) -> None: + def load_data(self, registry: SqlRegistry, filename: str) -> None: """Load registry test data from filename in data folder.""" with open(os.path.join(TESTDIR, "data", filename)) as stream: backend = YamlRepoImportBackend(stream, registry) @@ -121,8 +123,9 @@ def test_upgrade_v2(self) -> None: versions = self.db.manager_versions(_NAMESPACE) self.assertEqual(versions["dimensions-config"], (_NAMESPACE, "0", _REVISION_V0)) - butler = Butler(config, writeable=True) - self.load_data(butler.registry, "records.yaml") + butler = Butler.from_config(config, writeable=True) + assert isinstance(butler, DirectButler), "Only DirectButler is supported" + self.load_data(butler._registry, "records.yaml") # Check records for v0 attributes. records = list(butler.registry.queryDimensionRecords("visit")) @@ -164,7 +167,7 @@ def test_upgrade_v2(self) -> None: versions = self.db.manager_versions() self.assertEqual(versions["dimensions-config"], (_NAMESPACE, "2", _REVISION_V2)) - butler = Butler(config, writeable=False) + butler = Butler.from_config(config, writeable=False) # Check records for v2 attributes. records = list(butler.registry.queryDimensionRecords("instrument"))