From 7804a91c485b57aa6f7a80ff620cec221beddc9f Mon Sep 17 00:00:00 2001 From: John Parejko Date: Wed, 31 Jul 2024 12:30:25 -0700 Subject: [PATCH 1/5] Support conversion of components with convertible parents --- python/lsst/daf/butler/_dataset_type.py | 5 +++++ tests/test_parquet.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/python/lsst/daf/butler/_dataset_type.py b/python/lsst/daf/butler/_dataset_type.py index 084cefdfd1..dd3a67508f 100644 --- a/python/lsst/daf/butler/_dataset_type.py +++ b/python/lsst/daf/butler/_dataset_type.py @@ -323,6 +323,11 @@ class for this dataset type that can convert the python type associated or the storage class associated with the other can be converted to this. """ + # Support conversion when the components have the same name and + # convertible parents, e.g. Parquet storage classes. + if self.component() is not None and self.component() == other.component(): + return self.makeCompositeDatasetType().is_compatible_with(other.makeCompositeDatasetType()) + mostly_equal = self._equal_ignoring_storage_class(other) if not mostly_equal: return False diff --git a/tests/test_parquet.py b/tests/test_parquet.py index 65d75dd405..f071414c3e 100644 --- a/tests/test_parquet.py +++ b/tests/test_parquet.py @@ -629,6 +629,14 @@ def testWriteSingleIndexDataFrameReadAsArrowTable(self): # We check the set because pandas reorders the columns. self.assertEqual(set(columns), set(columns2)) + # Check getting a component while overriding the storage class via + # the dataset type. This overrides the parent storage class and then + # selects the component. + columnsType = self.datasetType.overrideStorageClass("ArrowAstropy").makeComponentDatasetType( + "columns" + ) + self.assertEqual(columns2, self.butler.get(columnsType)) + # Check reading the schema. schema = tab2.schema schema2 = self.butler.get( From 83bf65a6955c002ddad275b0690655a324fe6ad6 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 1 Aug 2024 09:51:32 -0700 Subject: [PATCH 2/5] Stop checking parent storage class when checking components for compatibility Datastore requires that components are compatible. Parent compatibility is not important when the component python object is returned. --- python/lsst/daf/butler/_dataset_type.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/python/lsst/daf/butler/_dataset_type.py b/python/lsst/daf/butler/_dataset_type.py index dd3a67508f..9321ea67c6 100644 --- a/python/lsst/daf/butler/_dataset_type.py +++ b/python/lsst/daf/butler/_dataset_type.py @@ -284,10 +284,7 @@ def _equal_ignoring_storage_class(self, other: Any) -> bool: return False if self._isCalibration != other._isCalibration: return False - if self._parentStorageClass is not None and other._parentStorageClass is not None: - return self._parentStorageClass == other._parentStorageClass - else: - return self._parentStorageClassName == other._parentStorageClassName + return True def __eq__(self, other: Any) -> bool: mostly_equal = self._equal_ignoring_storage_class(other) @@ -296,6 +293,13 @@ def __eq__(self, other: Any) -> bool: # Be careful not to force a storage class to import the corresponding # python code. + if self._parentStorageClass is not None and other._parentStorageClass is not None: + if self._parentStorageClass != other._parentStorageClass: + return False + else: + if self._parentStorageClassName != other._parentStorageClassName: + return False + if self._storageClass is not None and other._storageClass is not None: if self._storageClass != other._storageClass: return False @@ -309,7 +313,8 @@ def is_compatible_with(self, other: DatasetType) -> bool: Compatibility requires a matching name and dimensions and a storage class for this dataset type that can convert the python type associated - with the other storage class to this python type. + with the other storage class to this python type. Parent storage class + compatibility is not checked at all for components. Parameters ---------- @@ -323,11 +328,6 @@ class for this dataset type that can convert the python type associated or the storage class associated with the other can be converted to this. """ - # Support conversion when the components have the same name and - # convertible parents, e.g. Parquet storage classes. - if self.component() is not None and self.component() == other.component(): - return self.makeCompositeDatasetType().is_compatible_with(other.makeCompositeDatasetType()) - mostly_equal = self._equal_ignoring_storage_class(other) if not mostly_equal: return False From 24d7d0035ada0e66a5b9b2ec670bb8223a4f4dc7 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 1 Aug 2024 09:56:11 -0700 Subject: [PATCH 3/5] Add test where component is overridden in dataset type --- tests/test_parquet.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_parquet.py b/tests/test_parquet.py index f071414c3e..351b90f94f 100644 --- a/tests/test_parquet.py +++ b/tests/test_parquet.py @@ -629,6 +629,12 @@ def testWriteSingleIndexDataFrameReadAsArrowTable(self): # We check the set because pandas reorders the columns. self.assertEqual(set(columns), set(columns2)) + # Override the component using a dataset type. + columnsType = self.datasetType.makeComponentDatasetType("columns").overrideStorageClass( + "ArrowColumnList" + ) + self.assertEqual(columns2, self.butler.get(columnsType)) + # Check getting a component while overriding the storage class via # the dataset type. This overrides the parent storage class and then # selects the component. From 25f4204dea8975fca9c8ade100c201d11e778e4b Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 1 Aug 2024 10:21:24 -0700 Subject: [PATCH 4/5] Add test that fails if parent storage classes are checked for compatibility --- tests/test_datasets.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 7ec8cb0947..fc13898cde 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -280,7 +280,7 @@ def testCompatibility(self) -> None: def testOverrideStorageClass(self) -> None: storageA = StorageClass("test_a", pytype=list, converters={"dict": "builtins.list"}) - storageB = StorageClass("test_b", pytype=dict) + storageB = StorageClass("test_b", pytype=dict, converters={"list": "dict"}) dimensions = self.universe.conform(["instrument"]) dA = DatasetType("a", dimensions, storageA) @@ -291,15 +291,27 @@ def testOverrideStorageClass(self) -> None: round_trip = dB.overrideStorageClass(storageA) self.assertEqual(round_trip, dA) - # Check that parents move over. - parent = StorageClass("composite", components={"a": storageA, "c": storageA}) + # Check that parents move over. Assign a pytype to avoid using + # object in later tests. + parent = StorageClass("composite", pytype=tuple, components={"a": storageA, "c": storageA}) dP = DatasetType("comp", dimensions, parent) dP_A = dP.makeComponentDatasetType("a") - print(dP_A) dp_B = dP_A.overrideStorageClass(storageB) self.assertEqual(dp_B.storageClass, storageB) self.assertEqual(dp_B.parentStorageClass, parent) + # Check that components are checked for compatibility but parents + # can be different. + parent2 = StorageClass( + "composite2", + pytype=frozenset, + components={"a": storageB, "c": storageB}, + ) + dP2 = DatasetType("comp", dimensions, parent2) + # Components are compatible even though parents aren't. + self.assertFalse(dP.is_compatible_with(dP2)) + self.assertTrue(dP2.makeComponentDatasetType("a").is_compatible_with(dP_A)) + def testJson(self) -> None: storageA = StorageClass("test_a") dimensionsA = self.universe.conform(["instrument"]) From 7570ecb0b0f3ba366e8059c082a3ee27cd0557e9 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 1 Aug 2024 10:26:03 -0700 Subject: [PATCH 5/5] Refresh precommit --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1d58d0a370..61ba814437 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,10 +23,10 @@ repos: name: isort (python) - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.5.4 + rev: v0.5.5 hooks: - id: ruff - repo: https://github.com/numpy/numpydoc - rev: "v1.7.0" + rev: "v1.8.0rc2" hooks: - id: numpydoc-validation