Skip to content

Commit

Permalink
Initial work on extending butler collections API
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Aug 14, 2024
1 parent 276c136 commit 07aadd4
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 32 deletions.
14 changes: 13 additions & 1 deletion python/lsst/daf/butler/_butler_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,24 @@

from __future__ import annotations

__all__ = ("ButlerCollections",)
__all__ = ("ButlerCollections", "CollectionType", "CollectionInfo")

from abc import ABC, abstractmethod
from collections.abc import Iterable, Sequence
from typing import Any, overload

from pydantic import BaseModel

from ._collection_type import CollectionType


class CollectionInfo(BaseModel):
name: str
type: CollectionType
doc: str = ""
children: tuple[str, ...] = tuple()
parents: frozenset = frozenset()


class ButlerCollections(ABC, Sequence):
"""Methods for working with collections stored in the Butler."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@

from lsst.utils.iteration import ensure_iterable

from .._butler_collections import ButlerCollections
from .._butler_collections import ButlerCollections, CollectionInfo
from ..registry import CollectionType
from ..registry.interfaces import ChainedCollectionRecord
from ..registry.sql_registry import SqlRegistry


Expand Down Expand Up @@ -76,3 +78,33 @@ def remove_from_chain(
return self._registry._managers.collections.remove_from_collection_chain(
parent_collection_name, list(ensure_iterable(child_collection_names))
)

def query(
self,
expression: str | Iterable[str],
collection_types: set[CollectionType] | CollectionType | None = None,
flatten_chains: bool = False,
include_chains: bool | None = None,
) -> Sequence[str]:
if collection_types is None:
collection_types = CollectionType.all()

Check warning on line 90 in python/lsst/daf/butler/direct_butler/_direct_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_butler/_direct_butler_collections.py#L90

Added line #L90 was not covered by tests
return self._registry.queryCollections(
expression,
collectionTypes=collection_types,
flattenChains=flatten_chains,
includeChains=include_chains,
)

def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
record = self._registry.get_collection_record(name)
doc = ""
if include_doc:
doc = self._registry.getCollectionDocumentation(name) or ""

Check warning on line 102 in python/lsst/daf/butler/direct_butler/_direct_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/direct_butler/_direct_butler_collections.py#L102

Added line #L102 was not covered by tests
children: tuple[str, ...] = tuple()
if record.type == CollectionType.CHAINED:
assert isinstance(record, ChainedCollectionRecord)
children = tuple(record.children)
parents: set[str] = set()
if include_parents:
parents = self._registry.getCollectionParentChains(name)
return CollectionInfo(name=name, type=record.type, doc=doc, parents=parents, children=children)
94 changes: 94 additions & 0 deletions python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from __future__ import annotations

__all__ = ("RemoteButlerCollections",)

from collections.abc import Iterable, Sequence

from .._butler_collections import ButlerCollections, CollectionInfo
from .._collection_type import CollectionType
from ._registry import RemoteButlerRegistry


class RemoteButlerCollections(ButlerCollections):

Check warning on line 39 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L39

Added line #L39 was not covered by tests
"""Implementation of ButlerCollections for RemoteButler.
Parameters
----------
registry : `~lsst.daf.butler.registry.sql_registry.SqlRegistry`
Registry object used to work with the collections database.
"""

def __init__(self, registry: RemoteButlerRegistry):
self._registry = registry

Check warning on line 49 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L48-L49

Added lines #L48 - L49 were not covered by tests

@property

Check warning on line 51 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L51

Added line #L51 was not covered by tests
def defaults(self) -> Sequence[str]:
return self._registry.defaults.collections

Check warning on line 53 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L53

Added line #L53 was not covered by tests

def extend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:

Check warning on line 55 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L55

Added line #L55 was not covered by tests
raise NotImplementedError("Not yet available")

def prepend_chain(self, parent_collection_name: str, child_collection_names: str | Iterable[str]) -> None:

Check warning on line 58 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L58

Added line #L58 was not covered by tests
raise NotImplementedError("Not yet available")

def redefine_chain(

Check warning on line 61 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L61

Added line #L61 was not covered by tests
self, parent_collection_name: str, child_collection_names: str | Iterable[str]
) -> None:
raise NotImplementedError("Not yet available")

def remove_from_chain(

Check warning on line 66 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L66

Added line #L66 was not covered by tests
self, parent_collection_name: str, child_collection_names: str | Iterable[str]
) -> None:
raise NotImplementedError("Not yet available")

def query(

Check warning on line 71 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L71

Added line #L71 was not covered by tests
self,
expression: str | Iterable[str],
collection_types: set[CollectionType] | CollectionType | None = None,
flatten_chains: bool = False,
include_chains: bool | None = None,
) -> Sequence[str]:
if collection_types is None:
collection_types = CollectionType.all()
return self._registry.queryCollections(

Check warning on line 80 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L79-L80

Added lines #L79 - L80 were not covered by tests
expression,
collectionTypes=collection_types,
flattenChains=flatten_chains,
includeChains=include_chains,
)

def get_info(self, name: str, include_doc: bool = False, include_parents: bool = False) -> CollectionInfo:
info = self._registry._get_collection_info(

Check warning on line 88 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L87-L88

Added lines #L87 - L88 were not covered by tests
name, include_doc=include_doc, include_parents=include_parents
)
doc = info.doc or ""
children = info.children or ()
parents = info.parents or set()
return CollectionInfo(name=name, type=info.type, doc=doc, parents=parents, children=children)

Check warning on line 94 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L91-L94

Added lines #L91 - L94 were not covered by tests
52 changes: 22 additions & 30 deletions python/lsst/daf/butler/script/queryCollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,38 +71,34 @@ def _getTable(
dtype=(str, str, str),
)
butler = Butler.from_config(repo)
names = sorted(
butler.registry.queryCollections(collectionTypes=frozenset(collection_type), expression=glob or ...)
)
names = sorted(butler.collections.query(glob or "*", collection_types=frozenset(collection_type)))
if inverse:
for name in names:
type = butler.registry.getCollectionType(name)
parentNames = butler.registry.getCollectionParentChains(name)
if parentNames:
info = butler.collections.get_info(name, include_parents=True)
if info.parents:
first = True
for parentName in sorted(parentNames):
table.add_row((name if first else "", type.name if first else "", parentName))
for parentName in sorted(info.parents):
table.add_row((name if first else "", info.type.name if first else "", parentName))
first = False
else:
table.add_row((name, type.name, ""))
table.add_row((name, info.type.name, ""))
# If none of the datasets has a parent dataset then remove the
# description column.
if not any(c for c in table[descriptionCol]):
del table[descriptionCol]
else:
for name in names:
type = butler.registry.getCollectionType(name)
if type == CollectionType.CHAINED:
children = butler.registry.getCollectionChain(name)
if children:
info = butler.collections.get_info(name)
if info.type == CollectionType.CHAINED:
if info.children:
first = True
for child in children:
table.add_row((name if first else "", type.name if first else "", child))
for child in info.children:
table.add_row((name if first else "", info.type.name if first else "", child))
first = False
else:
table.add_row((name, type.name, ""))
table.add_row((name, info.type.name, ""))

Check warning on line 99 in python/lsst/daf/butler/script/queryCollections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/queryCollections.py#L99

Added line #L99 was not covered by tests
else:
table.add_row((name, type.name, ""))
table.add_row((name, info.type.name, ""))
# If there aren't any CHAINED datasets in the results then remove the
# description column.
if not any(columnVal == CollectionType.CHAINED.name for columnVal in table[typeCol]):
Expand Down Expand Up @@ -147,21 +143,17 @@ def _getTree(
butler = Butler.from_config(repo, without_datastore=True)

def addCollection(name: str, level: int = 0) -> None:
collectionType = butler.registry.getCollectionType(name)
table.add_row((" " * level + name, collectionType.name))
info = butler.collections.get_info(name, include_parents=inverse)
table.add_row((" " * level + name, info.type.name))
if inverse:
parentNames = butler.registry.getCollectionParentChains(name)
for pname in sorted(parentNames):
for pname in sorted(info.parents):
addCollection(pname, level + 1)
else:
if collectionType == CollectionType.CHAINED:
childNames = butler.registry.getCollectionChain(name)
for name in childNames:
if info.type == CollectionType.CHAINED:
for name in info.children:
addCollection(name, level + 1)

collections = butler.registry.queryCollections(
collectionTypes=frozenset(collection_type), expression=glob or ...
)
collections = butler.collections.query(glob or "*", collection_types=frozenset(collection_type))
for collection in sorted(collections):
addCollection(collection)
return table
Expand All @@ -174,12 +166,12 @@ def _getFlatten(
) -> Table:
butler = Butler.from_config(repo)
collectionNames = list(
butler.registry.queryCollections(
collectionTypes=frozenset(collection_type), flattenChains=True, expression=glob or ...
butler.collections.query(
glob or "*", collection_types=frozenset(collection_type), flatten_chains=True
)
)

collectionTypes = [butler.registry.getCollectionType(c).name for c in collectionNames]
collectionTypes = [butler.collections.get_info(c).type.name for c in collectionNames]
return Table((collectionNames, collectionTypes), names=("Name", "Type"))


Expand Down

0 comments on commit 07aadd4

Please sign in to comment.