Merge pull request #39 from lsst-dm/tickets/DM-44007

DM-44007: Improve support for sqlite registries
lsst-dm · Apr 23, 2024 · 7afde34 · 7afde34
2 parents 9a81b3b + b2b7211
commit 7afde34
Show file tree

Hide file tree

Showing 7 changed files with 270 additions and 504 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,12 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
       - id: check-yaml
       - id: end-of-file-fixer
       - id: trailing-whitespace
   - repo: https://github.com/psf/black
-    rev: 24.2.0
+    rev: 24.4.0
     hooks:
       - id: black
         # It is recommended to specify the latest version of Python
@@ -21,6 +21,6 @@ repos:
         name: isort (python)
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.3.0
+    rev: v0.4.1
     hooks:
       - id: ruff
diff --git a/migrations/dimensions-config/2a8a32e1bec3.py b/migrations/dimensions-config/2a8a32e1bec3.py
@@ -5,6 +5,7 @@
 Create Date: 2024-02-20 14:49:26.435042
 
 """
+
 import logging
 
 import sqlalchemy
@@ -92,7 +93,14 @@ def _update_config(config: dict) -> dict:
     # Actual schema change.
     for table, column in table_columns:
         _LOG.info("Alter %s.%s column type to %s", table, column, new_type)
-        op.alter_column(table, column, type_=new_type, schema=schema)
+        with op.batch_alter_table(table, schema=schema) as batch_op:
+            batch_op.alter_column(column, type_=new_type)
+            if op.get_bind().dialect.name == "sqlite" and table == "instrument":
+                # SQLite uses special check constraint.
+                constraint_name = "instrument_len_name"
+                batch_op.drop_constraint(constraint_name)
+                constraint = f'length("{column}")<={size} AND length("{column}")>=1'
+                batch_op.create_check_constraint(constraint_name, sqlalchemy.text(constraint))
 
     # Update attributes
     assert mig_context.bind is not None
@@ -141,6 +149,10 @@ def _lock_tables(tables: list[str], schema: str) -> None:
     """Lock all tables that need to be migrated to avoid conflicts."""
 
     connection = op.get_bind()
+    if connection.dialect.name == "sqlite":
+        # SQLite does not support LOCK TABLE.
+        return
+
     for table in tables:
         # We do not need quoting for schema/table names.
         if schema:

diff --git a/migrations/dimensions-config/c5ae3a2cd7c2.py b/migrations/dimensions-config/c5ae3a2cd7c2.py
@@ -5,6 +5,7 @@
 Create Date: 2022-11-25 12:04:18.424257
 
 """
+
 import sqlalchemy as sa
 from alembic import context, op
 from lsst.daf.butler_migrate.butler_attributes import ButlerAttributes
@@ -26,15 +27,15 @@ def upgrade() -> None:
         - Change observation_reason column size for visit and exposure tables.
         - For sqlite backend update check constraint for new column size.
     """
-    _migrate(2, 3, 68)
+    _migrate(2, 3, 68, 32)
 
 
 def downgrade() -> None:
     """Undo migration."""
-    _migrate(3, 2, 32)
+    _migrate(3, 2, 32, 68)
 
 
-def _migrate(old_version: int, new_version: int, column_size: int) -> None:
+def _migrate(old_version: int, new_version: int, column_size: int, old_column_size: int) -> None:
     mig_context = context.get_context()
 
     # When we use schemas in postgres then all tables belong to the same schema
@@ -67,13 +68,22 @@ def _update_config(config: dict) -> dict:
         with op.batch_alter_table(table_name, schema=schema) as batch_op:
             # change column type
             column = "observation_reason"
-            column_type = sa.String(column_size)
+            column_type: sa.types.TypeEngine
+            if column_size > 32:
+                # daf_butler uses Text for all string columns longer than 32
+                # characters.
+                column_type = sa.Text()
+            else:
+                column_type = sa.String(column_size)
             batch_op.alter_column(column, type_=column_type)  # type: ignore[attr-defined]
 
             assert mig_context.bind is not None, "Requires an existing bind"
             if mig_context.bind.dialect.name == "sqlite":
                 # For sqlite we also define check constraint
                 constraint_name = f"{table_name}_len_{column}"
                 constraint = f'length("{column}")<={column_size} AND length("{column}")>=1'
-                batch_op.drop_constraint(constraint_name)  # type: ignore[attr-defined]
-                batch_op.create_check_constraint(constraint_name, sa.text(constraint))  # type: ignore
+                if old_column_size <= 32:
+                    # Constraint only exists for shorter strings.
+                    batch_op.drop_constraint(constraint_name)  # type: ignore[attr-defined]
+                if column_size <= 32:
+                    batch_op.create_check_constraint(constraint_name, sa.text(constraint))  # type: ignore
diff --git a/python/lsst/daf/butler_migrate/_dimensions_json_utils.py b/python/lsst/daf/butler_migrate/_dimensions_json_utils.py
@@ -22,6 +22,30 @@
 import difflib
 import json
 
+import yaml
+from lsst.resources import ResourcePath
+
+
+def historical_dimensions_resource(universe_version: int, namespace: str = "daf_butler") -> ResourcePath:
+    """Return location of the dimensions configuration for a specific version.
+
+    Parameters
+    ----------
+    universe_version : `int`
+        Version number of the universe to be loaded.
+    namespace : `str`, optional
+        Configuration namespace.
+
+    Returns
+    -------
+    path : `lsst.resources.ResourcePath`
+        Location of the configuration, there is no guarantee that this resource
+        actually exists.
+    """
+    return ResourcePath(
+        f"resource://lsst.daf.butler/configs/old_dimensions/{namespace}_universe{universe_version}.yaml"
+    )
+
 
 def load_historical_dimension_universe_json(universe_version: int) -> str:
     """Load a specific version of the default dimension universe as JSON.
@@ -36,12 +60,7 @@ def load_historical_dimension_universe_json(universe_version: int) -> str:
     universe : `str`
         Dimension universe configuration encoded as a JSON string.
     """
-    import yaml
-    from lsst.resources import ResourcePath
-
-    path = ResourcePath(
-        f"resource://lsst.daf.butler/configs/old_dimensions/daf_butler_universe{universe_version}.yaml"
-    )
+    path = historical_dimensions_resource(universe_version)
     with path.open() as input:
         dimensions = yaml.safe_load(input)
     return json.dumps(dimensions)

diff --git a/python/lsst/daf/butler_migrate/timespan.py b/python/lsst/daf/butler_migrate/timespan.py
@@ -77,7 +77,7 @@ def format_timespan_value(timespan: Timespan, column_name: str, dialect: str) ->
     values : `dict` [ `str`, `typing.Any` ]
         Mapping from column name to value for that column.
     """
-    nanoseconds = timespan.to_simple()
+    nanoseconds = timespan.nsec
     if dialect == "postgresql":
         return {column_name: Range(*nanoseconds)}
     elif dialect == "sqlite":