From 7faf7adb195773210ecbdd06ef66649510028c29 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 8 Jun 2023 12:21:44 -0700 Subject: [PATCH 01/10] Use butler.dimensions rather than deprecated butler.registry.dimensions --- python/lsst/pipe/base/executionButlerBuilder.py | 4 ++-- python/lsst/pipe/base/testUtils.py | 10 ++++------ python/lsst/pipe/base/tests/pipelineStepTester.py | 2 +- tests/test_config_formatter.py | 2 +- tests/test_pipelineTask.py | 9 ++++----- 5 files changed, 12 insertions(+), 15 deletions(-) diff --git a/python/lsst/pipe/base/executionButlerBuilder.py b/python/lsst/pipe/base/executionButlerBuilder.py index 65bf9b41b..048850db5 100644 --- a/python/lsst/pipe/base/executionButlerBuilder.py +++ b/python/lsst/pipe/base/executionButlerBuilder.py @@ -238,7 +238,7 @@ def _export(butler: Butler, collections: Optional[Iterable[str]], inserts: DataS # Yaml is hard coded, since the class controls both ends of the # export/import BackendClass = get_class_of(butler._config["repo_transfer_formats", "yaml", "export"]) - backend = BackendClass(yamlBuffer, universe=butler.registry.dimensions) + backend = BackendClass(yamlBuffer, universe=butler.dimensions) exporter = RepoExportContext(butler.registry, butler.datastore, backend, directory=None, transfer=None) # Need to ensure that the dimension records for outputs are @@ -324,7 +324,7 @@ def _setupNewButler( config = Butler.makeRepo( root=outputLocation, config=config, - dimensionConfig=butler.registry.dimensions.dimensionConfig, + dimensionConfig=butler.dimensions.dimensionConfig, overwrite=True, forceConfigRoot=False, ) diff --git a/python/lsst/pipe/base/testUtils.py b/python/lsst/pipe/base/testUtils.py index 22053945f..7b41fb652 100644 --- a/python/lsst/pipe/base/testUtils.py +++ b/python/lsst/pipe/base/testUtils.py @@ -91,7 +91,7 @@ def makeQuantum( connections = task.config.ConnectionsClass(config=task.config) # type: ignore try: - _checkDimensionsMatch(butler.registry.dimensions, connections.dimensions, dataId.keys()) + _checkDimensionsMatch(butler.dimensions, connections.dimensions, dataId.keys()) except ValueError as e: raise ValueError("Error in quantum dimensions.") from e @@ -119,7 +119,7 @@ def makeQuantum( raise ValueError(f"Error in connection {name}.") from e quantum = Quantum( taskClass=type(task), - dataId=DataCoordinate.standardize(dataId, universe=butler.registry.dimensions), + dataId=DataCoordinate.standardize(dataId, universe=butler.dimensions), inputs=inputs, outputs=outputs, ) @@ -254,7 +254,7 @@ def _refFromConnection( A reference to a dataset compatible with ``connection``, with ID ``dataId``, in the collection pointed to by ``butler``. """ - universe = butler.registry.dimensions + universe = butler.dimensions # DatasetRef only tests if required dimension is missing, but not extras _checkDimensionsMatch(universe, set(connection.dimensions), dataId.keys()) dataId = DataCoordinate.standardize(dataId, **kwargs, universe=universe) @@ -431,9 +431,7 @@ def getInitInputs(butler: Butler, config: PipelineTaskConfig) -> Dict[str, Any]: for name in connections.initInputs: attribute = getattr(connections, name) # Get full dataset type to check for consistency problems - dsType = DatasetType( - attribute.name, butler.registry.dimensions.extract(set()), attribute.storageClass - ) + dsType = DatasetType(attribute.name, butler.dimensions.extract(set()), attribute.storageClass) # All initInputs have empty data IDs initInputs[name] = butler.get(dsType) diff --git a/python/lsst/pipe/base/tests/pipelineStepTester.py b/python/lsst/pipe/base/tests/pipelineStepTester.py index 697ccac7f..22e08e7de 100644 --- a/python/lsst/pipe/base/tests/pipelineStepTester.py +++ b/python/lsst/pipe/base/tests/pipelineStepTester.py @@ -77,7 +77,7 @@ def register_dataset_types(self, butler: Butler) -> None: dimensions, storageClass=storageClass, isCalibration=isCalibration, - universe=butler.registry.dimensions, + universe=butler.dimensions, ) ) diff --git a/tests/test_config_formatter.py b/tests/test_config_formatter.py index 186a8bb9e..6cdd9ab4e 100644 --- a/tests/test_config_formatter.py +++ b/tests/test_config_formatter.py @@ -50,7 +50,7 @@ def setUp(self): # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( - "config", dimensions=(), storageClass="Config", universe=self.butler.registry.dimensions + "config", dimensions=(), storageClass="Config", universe=self.butler.dimensions ) self.butler.registry.registerDatasetType(self.datasetType) diff --git a/tests/test_pipelineTask.py b/tests/test_pipelineTask.py index 06dcd9656..3960558c1 100644 --- a/tests/test_pipelineTask.py +++ b/tests/test_pipelineTask.py @@ -23,7 +23,6 @@ """ import unittest -from types import SimpleNamespace from typing import Any import lsst.pex.config as pexConfig @@ -38,7 +37,7 @@ class ButlerMock: def __init__(self) -> None: self.datasets: dict[str, dict[DataCoordinate, Any]] = {} - self.registry = SimpleNamespace(dimensions=DimensionUniverse()) + self.dimensions = DimensionUniverse() def get(self, ref: DatasetRef) -> Any: dsdata = self.datasets.get(ref.datasetType.name) @@ -151,7 +150,7 @@ def _testRunQuantum(self, full_butler: bool) -> None: quanta = self._makeQuanta(task.config) # add input data to butler - dstype0 = connections.input.makeDatasetType(butler.registry.dimensions) + dstype0 = connections.input.makeDatasetType(butler.dimensions) for i, quantum in enumerate(quanta): ref = quantum.inputs[dstype0.name][0] butler.put(100 + i, ref) @@ -231,7 +230,7 @@ def _testChain2(self, full_butler: bool) -> None: # add input data to butler task1Connections = task1.config.connections.ConnectionsClass(config=task1.config) - dstype0 = task1Connections.input.makeDatasetType(butler.registry.dimensions) + dstype0 = task1Connections.input.makeDatasetType(butler.dimensions) for i, quantum in enumerate(quanta1): ref = quantum.inputs[dstype0.name][0] butler.put(100 + i, ref) @@ -274,7 +273,7 @@ def testButlerQC(self): (quantum,) = self._makeQuanta(task.config, 1) # add input data to butler - dstype0 = connections.input.makeDatasetType(butler.registry.dimensions) + dstype0 = connections.input.makeDatasetType(butler.dimensions) ref = quantum.inputs[dstype0.name][0] butler.put(100, ref) From 6b93f97f59fef6233e9db136709b12d77ad0154d Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 9 Jun 2023 10:58:39 -0700 Subject: [PATCH 02/10] Fix graph load numpydoc docstring to say nodes are UUID --- python/lsst/pipe/base/graph/graph.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/lsst/pipe/base/graph/graph.py b/python/lsst/pipe/base/graph/graph.py index a88ee20db..1df4bfaeb 100644 --- a/python/lsst/pipe/base/graph/graph.py +++ b/python/lsst/pipe/base/graph/graph.py @@ -912,8 +912,8 @@ def loadUri( If None it is loaded from the QuantumGraph saved structure. If supplied, the DimensionUniverse from the loaded `QuantumGraph` will be validated against the supplied argument for compatibility. - nodes: iterable of `int` or None - Numbers that correspond to nodes in the graph. If specified, only + nodes: iterable of `uuid.UUID` or None + UUIDs that correspond to nodes in the graph. If specified, only these nodes will be loaded. Defaults to None, in which case all nodes will be loaded. graphID : `str` or `None` @@ -1203,8 +1203,8 @@ def load( If None it is loaded from the QuantumGraph saved structure. If supplied, the DimensionUniverse from the loaded `QuantumGraph` will be validated against the supplied argument for compatibility. - nodes: iterable of `int` or None - Numbers that correspond to nodes in the graph. If specified, only + nodes: iterable of `uuid.UUID` or None + UUIDs that correspond to nodes in the graph. If specified, only these nodes will be loaded. Defaults to None, in which case all nodes will be loaded. graphID : `str` or `None` From 84dede1f7d6a4d132ccd75415174c837ea46d69c Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 9 Jun 2023 12:38:15 -0700 Subject: [PATCH 03/10] Fix some docstring problems --- python/lsst/pipe/base/graph/graph.py | 221 ++++++++++++++------------- 1 file changed, 118 insertions(+), 103 deletions(-) diff --git a/python/lsst/pipe/base/graph/graph.py b/python/lsst/pipe/base/graph/graph.py index 1df4bfaeb..784cc4f99 100644 --- a/python/lsst/pipe/base/graph/graph.py +++ b/python/lsst/pipe/base/graph/graph.py @@ -106,31 +106,33 @@ class QuantumGraph: Parameters ---------- - quanta : Mapping of `TaskDef` to sets of `Quantum` + quanta : `~collections.abc.Mapping` [ `TaskDef`, \ + `set` [ `~lsst.daf.butler.Quantum` ] ] This maps tasks (and their configs) to the sets of data they are to process. - metadata : Optional Mapping of `str` to primitives + metadata : Optional `~collections.abc.Mapping` of `str` to primitives This is an optional parameter of extra data to carry with the graph. Entries in this mapping should be able to be serialized in JSON. - pruneRefs : iterable [ `DatasetRef` ], optional + pruneRefs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional Set of dataset refs to exclude from a graph. - universe : `lsst.daf.butler.DimensionUniverse`, optional + universe : `~lsst.daf.butler.DimensionUniverse`, optional The dimensions in which quanta can be defined. Need only be provided if no quanta have data IDs. - initInputs : `Mapping`, optional + initInputs : `~collections.abc.Mapping`, optional Maps tasks to their InitInput dataset refs. Dataset refs can be either resolved or non-resolved. Presently the same dataset refs are included - in each `Quantum` for the same task. - initOutputs : `Mapping`, optional + in each `~lsst.daf.butler.Quantum` for the same task. + initOutputs : `~collections.abc.Mapping`, optional Maps tasks to their InitOutput dataset refs. Dataset refs can be either resolved or non-resolved. For intermediate resolved refs their dataset ID must match ``initInputs`` and Quantum ``initInputs``. - globalInitOutputs : iterable [ `DatasetRef` ], optional + globalInitOutputs : iterable [ `~lsst.daf.butler.DatasetRef` ], optional Dataset refs for some global objects produced by pipeline. These objects include task configurations and package versions. Typically they have an empty DataId, but there is no real restriction on what can appear here. - registryDatasetTypes : iterable [ `DatasetType` ], optional + registryDatasetTypes : iterable [ `~lsst.daf.butler.DatasetType` ], \ + optional Dataset types which are used by this graph, their definitions must match registry. If registry does not define dataset type yet, then it should match one that will be created later. @@ -388,7 +390,7 @@ def pruneGraphFromRefs(self: _T, refs: Iterable[DatasetRef]) -> _T: Parameters ---------- - refs : `Iterable` of `DatasetRef` + refs : `Iterable` of `~lsst.daf.butler.DatasetRef` Refs which should be removed from resulting graph Returns @@ -440,49 +442,54 @@ def getQuantumNodeByNodeId(self, nodeId: uuid.UUID) -> QuantumNode: return self._nodeIdMap[nodeId] def getQuantaForTask(self, taskDef: TaskDef) -> FrozenSet[Quantum]: - """Return all the `Quantum` associated with a `TaskDef`. + """Return all the `~lsst.daf.butler.Quantum` associated with a + `TaskDef`. Parameters ---------- taskDef : `TaskDef` - The `TaskDef` for which `Quantum` are to be queried + The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be + queried. Returns ------- - frozenset of `Quantum` - The `set` of `Quantum` that is associated with the specified - `TaskDef`. + frozenset of `~lsst.daf.butler.Quantum` + The `set` of `~lsst.daf.butler.Quantum` that is associated with the + specified `TaskDef`. """ return frozenset(node.quantum for node in self._taskToQuantumNode.get(taskDef, ())) def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int: - """Return all the number of `Quantum` associated with a `TaskDef`. + """Return all the number of `~lsst.daf.butler.Quantum` associated with + a `TaskDef`. Parameters ---------- taskDef : `TaskDef` - The `TaskDef` for which `Quantum` are to be queried + The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be + queried. Returns ------- - count : int - The number of `Quantum` that are associated with the specified - `TaskDef`. + count : `int` + The number of `~lsst.daf.butler.Quantum` that are associated with + the specified `TaskDef`. """ return len(self._taskToQuantumNode.get(taskDef, ())) def getNodesForTask(self, taskDef: TaskDef) -> FrozenSet[QuantumNode]: - """Return all the `QuantumNodes` associated with a `TaskDef`. + r"""Return all the `QuantumNode`\s associated with a `TaskDef`. Parameters ---------- taskDef : `TaskDef` - The `TaskDef` for which `Quantum` are to be queried + The `TaskDef` for which `~lsst.daf.butler.Quantum` are to be + queried. Returns ------- - frozenset of `QuantumNodes` - The `frozenset` of `QuantumNodes` that is associated with the + nodes : `frozenset` [ `QuantumNode` ] + A `frozenset` of `QuantumNode` that is associated with the specified `TaskDef`. """ return frozenset(self._taskToQuantumNode[taskDef]) @@ -495,8 +502,8 @@ def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskD ---------- datasetTypeName : `str` A string representing the name of a dataset type to be queried, - can also accept a `DatasetTypeName` which is a `NewType` of str for - type safety in static type checking. + can also accept a `DatasetTypeName` which is a `~typing.NewType` of + `str` for type safety in static type checking. Returns ------- @@ -508,7 +515,7 @@ def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskD Raises ------ KeyError - Raised if the `DatasetTypeName` is not part of the `QuantumGraph` + Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. """ return (c for c in self._datasetDict.getConsumers(datasetTypeName)) @@ -520,19 +527,19 @@ def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> Optional[TaskD ---------- datasetTypeName : `str` A string representing the name of a dataset type to be queried, - can also accept a `DatasetTypeName` which is a `NewType` of str for - type safety in static type checking. + can also accept a `DatasetTypeName` which is a `~typing.NewType` of + `str` for type safety in static type checking. Returns ------- - `TaskDef` or `None` + result : `TaskDef` or `None` `TaskDef` that outputs `DatasetTypeName` as an output or None if none of the tasks produce this `DatasetTypeName`. Raises ------ KeyError - Raised if the `DatasetTypeName` is not part of the `QuantumGraph` + Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. """ return self._datasetDict.getProducer(datasetTypeName) @@ -544,38 +551,38 @@ def tasksWithDSType(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef] ---------- datasetTypeName : `str` A string representing the name of a dataset type to be queried, - can also accept a `DatasetTypeName` which is a `NewType` of str for - type safety in static type checking. + can also accept a `DatasetTypeName` which is a `~typing.NewType` of + `str` for type safety in static type checking. Returns ------- result : iterable of `TaskDef` `TaskDef` objects that are associated with the specified - `DatasetTypeName` + `DatasetTypeName`. Raises ------ KeyError - Raised if the `DatasetTypeName` is not part of the `QuantumGraph` + Raised if the `DatasetTypeName` is not part of the `QuantumGraph`. """ return self._datasetDict.getAll(datasetTypeName) def findTaskDefByName(self, taskName: str) -> List[TaskDef]: """Determine which `TaskDef` objects in this graph are associated - with a `str` representing a task name (looks at the taskName property - of `TaskDef` objects). + with a `str` representing a task name (looks at the ``taskName`` + property of `TaskDef` objects). Returns a list of `TaskDef` objects as a `PipelineTask` may appear multiple times in a graph with different labels. Parameters ---------- - taskName : str - Name of a task to search for + taskName : `str` + Name of a task to search for. Returns ------- - result : list of `TaskDef` + result : `list` of `TaskDef` List of the `TaskDef` objects that have the name specified. Multiple values are returned in the case that a task is used multiple times with different labels. @@ -593,7 +600,7 @@ def findTaskDefByLabel(self, label: str) -> Optional[TaskDef]: Parameters ---------- - taskName : str + taskName : `str` Name of a task to search for Returns @@ -607,19 +614,21 @@ def findTaskDefByLabel(self, label: str) -> Optional[TaskDef]: return None def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> Set[Quantum]: - """Return all the `Quantum` that contain a specified `DatasetTypeName`. + r"""Return all the `~lsst.daf.butler.Quantum` that contain a specified + `DatasetTypeName`. Parameters ---------- datasetTypeName : `str` The name of the dataset type to search for as a string, - can also accept a `DatasetTypeName` which is a `NewType` of str for - type safety in static type checking. + can also accept a `DatasetTypeName` which is a `~typing.NewType` of + `str` for type safety in static type checking. Returns ------- result : `set` of `QuantumNode` objects - A `set` of `QuantumNode`s that contain specified `DatasetTypeName` + A `set` of `QuantumNode`\s that contain specified + `DatasetTypeName`. Raises ------ @@ -637,13 +646,13 @@ def checkQuantumInGraph(self, quantum: Quantum) -> bool: Parameters ---------- - quantum : `Quantum` - The quantum to search for + quantum : `lsst.daf.butler.Quantum` + The quantum to search for. Returns ------- - `bool` - The result of searching for the quantum + in_graph : `bool` + The result of searching for the quantum. """ for node in self: if quantum == node.quantum: @@ -655,8 +664,8 @@ def writeDotGraph(self, output: Union[str, io.BufferedIOBase]) -> None: Parameters ---------- - output : str or `io.BufferedIOBase` - Either a filesystem path to write to, or a file handle object + output : `str` or `io.BufferedIOBase` + Either a filesystem path to write to, or a file handle object. """ write_dot(self._connectedQuanta, output) @@ -667,11 +676,12 @@ def subset(self: _T, nodes: Union[QuantumNode, Iterable[QuantumNode]]) -> _T: Parameters ---------- nodes : `QuantumNode` or iterable of `QuantumNode` + Nodes from which to create subset. Returns ------- graph : instance of graph type - An instance of the type from which the subset was created + An instance of the type from which the subset was created. """ if not isinstance(nodes, Iterable): nodes = (nodes,) @@ -719,8 +729,8 @@ def subsetToConnected(self: _T) -> Tuple[_T, ...]: Returns ------- - result : list of `QuantumGraph` - A list of graphs that are each connected + result : `list` of `QuantumGraph` + A list of graphs that are each connected. """ return tuple( self.subset(connectedSet) @@ -734,12 +744,12 @@ def determineInputsToQuantumNode(self, node: QuantumNode) -> Set[QuantumNode]: Parameters ---------- node : `QuantumNode` - The node of the graph for which inputs are to be determined + The node of the graph for which inputs are to be determined. Returns ------- - set of `QuantumNode` - All the nodes that are direct inputs to specified node + inputs : `set` of `QuantumNode` + All the nodes that are direct inputs to specified node. """ return set(pred for pred in self._connectedQuanta.predecessors(node)) @@ -750,12 +760,12 @@ def determineOutputsOfQuantumNode(self, node: QuantumNode) -> Set[QuantumNode]: Parameters ---------- node : `QuantumNode` - The node of the graph for which outputs are to be determined + The node of the graph for which outputs are to be determined. Returns ------- - set of `QuantumNode` - All the nodes that are direct outputs to specified node + outputs : `set` of `QuantumNode` + All the nodes that are direct outputs to specified node. """ return set(succ for succ in self._connectedQuanta.successors(node)) @@ -772,7 +782,7 @@ def determineConnectionsOfQuantumNode(self: _T, node: QuantumNode) -> _T: Returns ------- graph : graph of `QuantumNode` - All the nodes that are directly connected to specified node + All the nodes that are directly connected to specified node. """ nodes = self.determineInputsToQuantumNode(node).union(self.determineOutputsOfQuantumNode(node)) nodes.add(node) @@ -785,12 +795,12 @@ def determineAncestorsOfQuantumNode(self: _T, node: QuantumNode) -> _T: Parameters ---------- node : `QuantumNode` - The node for which all ansestors are to be determined + The node for which all ancestors are to be determined Returns ------- - graph of `QuantumNode` - Graph of node and all of its ansestors + ancestors : graph of `QuantumNode` + Graph of node and all of its ancestors. """ predecessorNodes = nx.ancestors(self._connectedQuanta, node) predecessorNodes.add(node) @@ -802,7 +812,7 @@ def findCycle(self) -> List[Tuple[QuantumNode, QuantumNode]]: Returns ------- - result : list of tuple of `QuantumNode`, `QuantumNode` + result : `list` of `tuple` of [ `QuantumNode`, `QuantumNode` ] A list of any graph edges that form a cycle, or an empty list if there is no cycle. Empty list to so support if graph.find_cycle() syntax as an empty list is falsy. @@ -817,7 +827,7 @@ def saveUri(self, uri: ResourcePathExpression) -> None: Parameters ---------- - uri : convertible to `ResourcePath` + uri : convertible to `~lsst.resources.ResourcePath` URI to where the graph should be saved. """ buffer = self._buildSaveObject() @@ -847,7 +857,7 @@ def initInputRefs(self, taskDef: TaskDef) -> Optional[List[DatasetRef]]: Returns ------- - refs : `list` [ `DatasetRef` ] or None + refs : `list` [ `lsst.daf.butler.DatasetRef` ] or None DatasetRef for the task InitInput, can be `None`. This can return either resolved or non-resolved reference. """ @@ -863,7 +873,7 @@ def initOutputRefs(self, taskDef: TaskDef) -> Optional[List[DatasetRef]]: Returns ------- - refs : `list` [ `DatasetRef` ] or None + refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or None DatasetRefs for the task InitOutput, can be `None`. This can return either resolved or non-resolved reference. Resolved reference will match Quantum's initInputs if this is an intermediate dataset type. @@ -875,7 +885,7 @@ def globalInitOutputRefs(self) -> List[DatasetRef]: Returns ------- - refs : `list` [ `DatasetRef` ] + refs : `list` [ `~lsst.daf.butler.DatasetRef` ] DatasetRefs for global InitOutputs. """ return self._globalInitOutputRefs @@ -886,7 +896,7 @@ def registryDatasetTypes(self) -> List[DatasetType]: Returns ------- - refs : `list` [ `DatasetType` ] + refs : `list` [ `~lsst.daf.butler.DatasetType` ] Dataset types for this graph. """ return self._registryDatasetTypes @@ -904,15 +914,16 @@ def loadUri( Parameters ---------- - uri : convertible to `ResourcePath` + uri : convertible to `~lsst.resources.ResourcePath` URI from where to load the graph. - universe: `~lsst.daf.butler.DimensionUniverse` optional - DimensionUniverse instance, not used by the method itself but - needed to ensure that registry data structures are initialized. - If None it is loaded from the QuantumGraph saved structure. If - supplied, the DimensionUniverse from the loaded `QuantumGraph` + universe : `~lsst.daf.butler.DimensionUniverse` optional + `~lsst.daf.butler.DimensionUniverse` instance, not used by the + method itself but needed to ensure that registry data structures + are initialized. If `None` it is loaded from the `QuantumGraph` + saved structure. If supplied, the + `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` will be validated against the supplied argument for compatibility. - nodes: iterable of `uuid.UUID` or None + nodes : iterable of `uuid.UUID` or None UUIDs that correspond to nodes in the graph. If specified, only these nodes will be loaded. Defaults to None, in which case all nodes will be loaded. @@ -920,7 +931,7 @@ def loadUri( If specified this ID is verified against the loaded graph prior to loading any Nodes. This defaults to None in which case no validation is done. - minimumVersion : int + minimumVersion : `int` Minimum version of a save file to load. Set to -1 to load all versions. Older versions may need to be loaded, and re-saved to upgrade them to the latest format before they can be used in @@ -935,23 +946,24 @@ def loadUri( ------ TypeError Raised if pickle contains instance of a type other than - QuantumGraph. + `QuantumGraph`. ValueError Raised if one or more of the nodes requested is not in the `QuantumGraph` or if graphID parameter does not match the graph being loaded or if the supplied uri does not point at a valid `QuantumGraph` save file. RuntimeError - Raise if Supplied DimensionUniverse is not compatible with the - DimensionUniverse saved in the graph - + Raise if Supplied `~lsst.daf.butler.DimensionUniverse` is not + compatible with the `~lsst.daf.butler.DimensionUniverse` saved in + the graph. Notes ----- Reading Quanta from pickle requires existence of singleton - DimensionUniverse which is usually instantiated during Registry - initialization. To make sure that DimensionUniverse exists this method - accepts dummy DimensionUniverse argument. + `~lsst.daf.butler.DimensionUniverse` which is usually instantiated + during `~lsst.daf.butler.Registry` initialization. To make sure + that `~lsst.daf.butler.DimensionUniverse` exists this method + accepts dummy `~lsst.daf.butler.DimensionUniverse` argument. """ uri = ResourcePath(uri) # With ResourcePath we have the choice of always using a local file @@ -980,10 +992,11 @@ def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> Opt Parameters ---------- - uri : convertible to `ResourcePath` + uri : convertible to `~lsst.resources.ResourcePath` The location of the `QuantumGraph` to load. If the argument is a - string, it must correspond to a valid `ResourcePath` path. - minimumVersion : int + string, it must correspond to a valid + `~lsst.resources.ResourcePath` path. + minimumVersion : `int` Minimum version of a save file to load. Set to -1 to load all versions. Older versions may need to be loaded, and re-saved to upgrade them to the latest format before they can be used in @@ -998,9 +1011,9 @@ def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> Opt Raises ------ ValueError - Raised if `QuantuGraph` was saved as a pickle. - Raised if the extention of the file specified by uri is not a - `QuantumGraph` extention. + Raised if `QuantumGraph` was saved as a pickle. + Raised if the extension of the file specified by uri is not a + `QuantumGraph` extension. """ uri = ResourcePath(uri) if uri.getExtension() in (".pickle", ".pkl"): @@ -1191,19 +1204,20 @@ def load( graphID: Optional[BuildId] = None, minimumVersion: int = 3, ) -> QuantumGraph: - """Read QuantumGraph from a file that was made by `save`. + """Read `QuantumGraph` from a file that was made by `save`. Parameters ---------- file : `io.IO` of bytes File with pickle data open in binary mode. - universe: `~lsst.daf.butler.DimensionUniverse`, optional - DimensionUniverse instance, not used by the method itself but - needed to ensure that registry data structures are initialized. - If None it is loaded from the QuantumGraph saved structure. If - supplied, the DimensionUniverse from the loaded `QuantumGraph` + universe : `~lsst.daf.butler.DimensionUniverse`, optional + `~lsst.daf.butler.DimensionUniverse` instance, not used by the + method itself but needed to ensure that registry data structures + are initialized. If `None` it is loaded from the `QuantumGraph` + saved structure. If supplied, the + `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` will be validated against the supplied argument for compatibility. - nodes: iterable of `uuid.UUID` or None + nodes : iterable of `uuid.UUID` or None UUIDs that correspond to nodes in the graph. If specified, only these nodes will be loaded. Defaults to None, in which case all nodes will be loaded. @@ -1211,7 +1225,7 @@ def load( If specified this ID is verified against the loaded graph prior to loading any Nodes. This defaults to None in which case no validation is done. - minimumVersion : int + minimumVersion : `int` Minimum version of a save file to load. Set to -1 to load all versions. Older versions may need to be loaded, and re-saved to upgrade them to the latest format before they can be used in @@ -1226,7 +1240,7 @@ def load( ------ TypeError Raised if pickle contains instance of a type other than - QuantumGraph. + `QuantumGraph`. ValueError Raised if one or more of the nodes requested is not in the `QuantumGraph` or if graphID parameter does not match the graph @@ -1236,9 +1250,10 @@ def load( Notes ----- Reading Quanta from pickle requires existence of singleton - DimensionUniverse which is usually instantiated during Registry - initialization. To make sure that DimensionUniverse exists this method - accepts dummy DimensionUniverse argument. + `~lsst.daf.butler.DimensionUniverse` which is usually instantiated + during `~lsst.daf.butler.Registry` initialization. To make sure that + `~lsst.daf.butler.DimensionUniverse` exists this method accepts dummy + `~lsst.daf.butler.DimensionUniverse` argument. """ # Try to see if the file handle contains pickle data, this will be # removed in the future From c5a057cf4b43fb7c293d823f3e8d6ce1f27f0a6a Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 9 Jun 2023 14:08:00 -0700 Subject: [PATCH 04/10] Fixes to docstrings motivated by pydocstyle --- .../pipe/base/_datasetQueryConstraints.py | 10 +- python/lsst/pipe/base/_dataset_handle.py | 16 +-- .../base/_observation_dimension_packer.py | 2 + python/lsst/pipe/base/butlerQuantumContext.py | 29 ++--- python/lsst/pipe/base/config.py | 3 +- python/lsst/pipe/base/configOverrides.py | 36 +++--- python/lsst/pipe/base/connectionTypes.py | 15 ++- python/lsst/pipe/base/connections.py | 71 +++++++----- .../lsst/pipe/base/executionButlerBuilder.py | 31 ++--- python/lsst/pipe/base/formatters/pexConfig.py | 3 +- python/lsst/pipe/base/graph/_implDetails.py | 106 +++++++++--------- python/lsst/pipe/base/graph/_loadHelpers.py | 24 ++-- .../pipe/base/graph/_versionDeserializers.py | 25 +++-- python/lsst/pipe/base/graph/graph.py | 14 ++- python/lsst/pipe/base/graph/quantumNode.py | 6 +- python/lsst/pipe/base/graphBuilder.py | 15 ++- python/lsst/pipe/base/pipeTools.py | 7 +- python/lsst/pipe/base/pipeline.py | 39 +++---- python/lsst/pipe/base/pipelineIR.py | 63 ++++++----- python/lsst/pipe/base/pipelineTask.py | 6 +- .../pipe/base/script/transfer_from_graph.py | 1 - python/lsst/pipe/base/task.py | 16 +-- .../pipe/base/tests/mocks/_data_id_match.py | 8 +- .../pipe/base/tests/mocks/_pipeline_task.py | 2 + python/lsst/pipe/base/tests/no_dimensions.py | 4 + python/lsst/pipe/base/tests/simpleQGraph.py | 10 +- python/lsst/pipe/base/tests/util.py | 6 +- 27 files changed, 307 insertions(+), 261 deletions(-) diff --git a/python/lsst/pipe/base/_datasetQueryConstraints.py b/python/lsst/pipe/base/_datasetQueryConstraints.py index 166cf14fb..64c910793 100644 --- a/python/lsst/pipe/base/_datasetQueryConstraints.py +++ b/python/lsst/pipe/base/_datasetQueryConstraints.py @@ -31,13 +31,13 @@ class DatasetQueryConstraintVariant(Iterable, Protocol): - """This class is the base for all the valid variants for controling + """Base for all the valid variants for controlling constraining graph building queries based on dataset type existence. ALL variant corresponds to using all input dataset types to constrain a query. - OFF variant corrresponds to not using any dataset types to constrain a + OFF variant corresponds to not using any dataset types to constrain a graph building query. LIST variant should be used when one or more specific names should be used @@ -69,9 +69,9 @@ def fromExpression(cls, expression: str) -> "DatasetQueryConstraintVariant": """Select and return the correct Variant that corresponds to the input expression. - Valid values are `all` for all inputs dataset types in pipeline, `off` - to not consider dataset type existence as a constraint, single or comma - seperated list of dataset type names. + Valid values are ``all`` for all inputs dataset types in pipeline, + ``off`` to not consider dataset type existence as a constraint, single + or comma-separated list of dataset type names. """ if not isinstance(expression, str): raise ValueError("Expression must be a string") diff --git a/python/lsst/pipe/base/_dataset_handle.py b/python/lsst/pipe/base/_dataset_handle.py index 34063dce3..db230a463 100644 --- a/python/lsst/pipe/base/_dataset_handle.py +++ b/python/lsst/pipe/base/_dataset_handle.py @@ -89,7 +89,7 @@ def get( parameters: Optional[dict] = None, storageClass: str | StorageClass | None = None, ) -> Any: - """Retrieves the dataset pointed to by this handle + """Retrieve the dataset pointed to by this handle. This handle may be used multiple times, possibly with different parameters. @@ -101,14 +101,14 @@ def get( may specify the name of the component to use in the get operation. parameters : `dict` or None The parameters argument will be passed to the butler get method. - It defaults to None. If the value is not None, this dict will + It defaults to `None`. If the value is not `None`, this `dict` will be merged with the parameters dict used to construct the - `DeferredDatasetHandle` class. - storageClass : `StorageClass` or `str`, optional + `~lsst.daf.butler.DeferredDatasetHandle` class. + storageClass : `~lsst.daf.butler.StorageClass` or `str`, optional The storage class to be used to override the Python type returned by this method. By default the returned type matches - the type stored. Specifying a read `StorageClass` can force a - different type to be returned. + the type stored. Specifying a read `~lsst.daf.butler.StorageClass` + can force a different type to be returned. This type must be compatible with the original type. Returns @@ -218,7 +218,7 @@ def _getStorageClass(self) -> StorageClass: Returns ------- - storageClass : `StorageClass` + storageClass : `~lsst.daf.butler.StorageClass` The storage class associated with this handle, or one derived from the python type of the stored object. @@ -258,4 +258,4 @@ def _getStorageClass(self) -> StorageClass: copy: bool = False """Control whether a copy of the in-memory dataset is returned for every - call to get().""" + call to `get()`.""" diff --git a/python/lsst/pipe/base/_observation_dimension_packer.py b/python/lsst/pipe/base/_observation_dimension_packer.py index b59ec3ed5..b915b6005 100644 --- a/python/lsst/pipe/base/_observation_dimension_packer.py +++ b/python/lsst/pipe/base/_observation_dimension_packer.py @@ -34,6 +34,8 @@ class ObservationDimensionPackerConfig(Config): + """Config associated with a `ObservationDimensionPacker`.""" + # Config fields are annotated as Any because support for better # annotations is broken on Fields with optional=True. n_detectors: Any = Field( diff --git a/python/lsst/pipe/base/butlerQuantumContext.py b/python/lsst/pipe/base/butlerQuantumContext.py index 373055710..aa50d112e 100644 --- a/python/lsst/pipe/base/butlerQuantumContext.py +++ b/python/lsst/pipe/base/butlerQuantumContext.py @@ -102,7 +102,7 @@ def get( None, ], ) -> Any: - """Fetches data from the butler + """Fetch data from the butler Parameters ---------- @@ -200,7 +200,7 @@ def put( values: Union[Struct, List[Any], Any], dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef], ) -> None: - """Puts data into the butler + """Put data into the butler. Parameters ---------- @@ -226,9 +226,9 @@ def put( Raises ------ ValueError - Raised if a `DatasetRef` is passed to put that is not defined in - the quantum object, or the type of values does not match what is - expected from the type of dataset. + Raised if a `~lsst.daf.butler.DatasetRef` is passed to put that is + not defined in the `~lsst.daf.butler.Quantum` object, or the type + of values does not match what is expected from the type of dataset. """ if isinstance(dataset, OutputQuantizedConnection): if not isinstance(values, Struct): @@ -258,20 +258,23 @@ def put( raise TypeError("Dataset argument is not a type that can be used to put") def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None: - """Internal function used to check if a DatasetRef is part of the input - quantum + """Check if a `~lsst.daf.butler.DatasetRef` is part of the input + `~lsst.daf.butler.Quantum`. - This function will raise an exception if the ButlerQuantumContext is - used to get/put a DatasetRef which is not defined in the quantum. + This function will raise an exception if the `ButlerQuantumContext` is + used to get/put a `~lsst.daf.butler.DatasetRef` which is not defined + in the quantum. Parameters ---------- - ref : `list` of `DatasetRef` or `DatasetRef` - Either a list or a single `DatasetRef` to check + ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \ + `~lsst.daf.butler.DatasetRef` + Either a `list` or a single `DatasetRef` to check inout : `set` The connection type to check, e.g. either an input or an output. This prevents both types needing to be checked for every operation, - which may be important for Quanta with lots of `DatasetRef`. + which may be important for Quanta with lots of + `~lsst.daf.butler.DatasetRef`. """ if not isinstance(ref, list): ref = [ref] @@ -282,6 +285,6 @@ def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) @property def dimensions(self) -> DimensionUniverse: """Structure managing all dimensions recognized by this data - repository (`DimensionUniverse`). + repository (`~lsst.daf.butler.DimensionUniverse`). """ return self.__butler.dimensions diff --git a/python/lsst/pipe/base/config.py b/python/lsst/pipe/base/config.py index 1762431cf..cfdc4917d 100644 --- a/python/lsst/pipe/base/config.py +++ b/python/lsst/pipe/base/config.py @@ -59,7 +59,8 @@ class TemplateField(pexConfig.Field): - """This Field is specialized for use with connection templates. + """Field specialized for use with connection templates. + Specifically it treats strings or numbers as valid input, as occasionally numbers are used as a cycle counter in templates. diff --git a/python/lsst/pipe/base/configOverrides.py b/python/lsst/pipe/base/configOverrides.py index a02026542..3c49ed2b0 100644 --- a/python/lsst/pipe/base/configOverrides.py +++ b/python/lsst/pipe/base/configOverrides.py @@ -61,9 +61,7 @@ def __init__(self, namespace): self.variables = namespace def visit_Name(self, node): - """This method gets called when the parser has determined a node - corresponds to a variable name. - """ + """Handle a node corresponding to a variable name.""" # If the id (name) of the variable is in the dictionary of valid names, # load and return the corresponding variable. if node.id in self.variables: @@ -73,35 +71,33 @@ def visit_Name(self, node): return f"{node.id}" def visit_List(self, node): - """This method is visited if the node is a list. Constructs a list out - of the sub nodes. + """Build a list out of the sub nodes when a list node is + encountered. """ return [self.visit(elm) for elm in node.elts] def visit_Tuple(self, node): - """This method is visited if the node is a tuple. Constructs a list out - of the sub nodes, and then turns it into a tuple. + """Build a list out of the sub nodes and then turn it into a + tuple. """ return tuple(self.visit_List(node)) def visit_Constant(self, node): - """This method is visited if the node is a constant""" + """Return constant from node.""" return node.value def visit_Dict(self, node): - """This method is visited if the node is a dict. It builds a dict out - of the component nodes. - """ + """Build dict out of component nodes if dict node encountered.""" return {self.visit(key): self.visit(value) for key, value in zip(node.keys, node.values)} def visit_Set(self, node): - """This method is visited if the node is a set. It builds a set out - of the component nodes. - """ + """Build set out of node is set encountered.""" return {self.visit(el) for el in node.elts} def visit_UnaryOp(self, node): - """This method is visited if the node is a unary operator. Currently + """Handle unary operators. + + This method is visited if the node is a unary operator. Currently The only operator we support is the negative (-) operator, all others are passed to generic_visit method. """ @@ -111,7 +107,9 @@ def visit_UnaryOp(self, node): self.generic_visit(node) def generic_visit(self, node): - """This method is called for all other node types. It will just raise + """Handle other node types. + + This method is called for all other node types. It will just raise a value error, because this is a type of expression that we do not support. """ @@ -127,7 +125,7 @@ class ConfigOverrides: or some other configuration). Methods - ---------- + ------- addFileOverride(filename) Add overrides from a specified file. addValueOverride(field, value) @@ -149,9 +147,9 @@ def addFileOverride(self, filename): Parameters ---------- - filename : convertible to `ResourcePath` + filename : convertible to `~lsst.resources.ResourcePath` Path or URI to the override file. All URI schemes supported by - `ResourcePath` are supported. + `~lsst.resources.ResourcePath` are supported. """ self._overrides.append((OverrideTypes.File, ResourcePath(filename))) diff --git a/python/lsst/pipe/base/connectionTypes.py b/python/lsst/pipe/base/connectionTypes.py index 0d75910ae..036901117 100644 --- a/python/lsst/pipe/base/connectionTypes.py +++ b/python/lsst/pipe/base/connectionTypes.py @@ -47,9 +47,9 @@ class BaseConnection: of the given dataset type. Tasks with more than one connection with ``multiple=True`` with the same dimensions may want to implement `PipelineTaskConnections.adjustQuantum` to ensure those datasets are - consistent (i.e. zip-iterable) in `PipelineTask.runQuantum` and notify - the execution system as early as possible of outputs that will not be - produced because the corresponding input is missing. + consistent (i.e. zip-iterable) in `PipelineTask.runQuantum()` and + notify the execution system as early as possible of outputs that will + not be produced because the corresponding input is missing. """ name: str @@ -296,7 +296,7 @@ def __post_init__(self) -> None: @dataclasses.dataclass(frozen=True) class PrerequisiteInput(BaseInput): - """Class used for declaring PipelineTask prerequisite connections + """Class used for declaring PipelineTask prerequisite connections. Parameters ---------- @@ -359,7 +359,6 @@ class PrerequisiteInput(BaseInput): in each visit for which the visit overlaps a tract, not just those where that detector+visit combination overlaps the tract). - Prerequisite inputs may be optional (regular inputs are never optional). - """ lookupFunction: Callable[ @@ -371,14 +370,20 @@ class PrerequisiteInput(BaseInput): @dataclasses.dataclass(frozen=True) class Output(DimensionedConnection): + """Connection for output dataset.""" + _connection_type_set: ClassVar[str] = "outputs" @dataclasses.dataclass(frozen=True) class InitInput(BaseConnection): + """Connection for initInput dataset.""" + _connection_type_set: ClassVar[str] = "initInputs" @dataclasses.dataclass(frozen=True) class InitOutput(BaseConnection): + """Connection for initOutput dataset.""" + _connection_type_set: ClassVar[str] = "initOutputs" diff --git a/python/lsst/pipe/base/connections.py b/python/lsst/pipe/base/connections.py index 07105d7f0..6789b9210 100644 --- a/python/lsst/pipe/base/connections.py +++ b/python/lsst/pipe/base/connections.py @@ -60,7 +60,7 @@ class ScalarError(TypeError): class PipelineTaskConnectionDict(UserDict): - """This is a special dict class used by PipelineTaskConnectionMetaclass + """A special dict class used by PipelineTaskConnectionMetaclass This dict is used in PipelineTaskConnection class creation, as the dictionary that is initially used as __dict__. It exists to @@ -393,9 +393,9 @@ def __len__(self) -> int: def __iter__( self, ) -> Generator[tuple[str, DatasetRef | list[DatasetRef]], None, None]: - """Make an Iterator for this QuantizedConnection + """Make an iterator for this `QuantizedConnection`. - Iterating over a QuantizedConnection will yield a tuple with the name + Iterating over a `QuantizedConnection` will yield a tuple with the name of an attribute and the value associated with that name. This is similar to dict.items() but is on the namespace attributes rather than dict keys. @@ -403,24 +403,29 @@ def __iter__( yield from ((name, getattr(self, name)) for name in self._attributes) def keys(self) -> Generator[str, None, None]: - """Returns an iterator over all the attributes added to a - QuantizedConnection class + """Return an iterator over all the attributes added to a + `QuantizedConnection` class """ yield from self._attributes class InputQuantizedConnection(QuantizedConnection): + """Input variant of a `QuantizedConnection`.""" + pass class OutputQuantizedConnection(QuantizedConnection): + """Output variant of a `QuantizedConnection`.""" + pass @dataclass(frozen=True) class DeferredDatasetRef: - """A wrapper class for `DatasetRef` that indicates that a `PipelineTask` - should receive a `DeferredDatasetHandle` instead of an in-memory dataset. + """A wrapper class for `~lsst.daf.butler.DatasetRef` that indicates that a + `PipelineTask` should receive a `~lsst.daf.butler.DeferredDatasetHandle` + instead of an in-memory dataset. Parameters ---------- @@ -452,7 +457,7 @@ class PipelineTaskConnections(metaclass=PipelineTaskConnectionsMetaclass): A `PipelineTaskConfig` class instance whose class has been configured to use this `PipelineTaskConnections` class. - See also + See Also -------- iterConnections @@ -680,20 +685,21 @@ def __delattr__(self, name): def buildDatasetRefs( self, quantum: Quantum ) -> tuple[InputQuantizedConnection, OutputQuantizedConnection]: - """Builds QuantizedConnections corresponding to input Quantum + """Build `QuantizedConnection` corresponding to input + `~lsst.daf.butler.Quantum`. Parameters ---------- quantum : `lsst.daf.butler.Quantum` Quantum object which defines the inputs and outputs for a given - unit of processing + unit of processing. Returns ------- retVal : `tuple` of (`InputQuantizedConnection`, `OutputQuantizedConnection`) Namespaces mapping attribute names (identifiers of connections) to butler references defined in the - input `lsst.daf.butler.Quantum` + input `lsst.daf.butler.Quantum`. """ inputDatasetRefs = InputQuantizedConnection() outputDatasetRefs = OutputQuantizedConnection() @@ -771,14 +777,15 @@ def adjustQuantum( inputs : `dict` Dictionary whose keys are an input (regular or prerequisite) connection name and whose values are a tuple of the connection - instance and a collection of associated `DatasetRef` objects. + instance and a collection of associated + `~lsst.daf.butler.DatasetRef` objects. The exact type of the nested collections is unspecified; it can be assumed to be multi-pass iterable and support `len` and ``in``, but it should not be mutated in place. In contrast, the outer dictionaries are guaranteed to be temporary copies that are true `dict` instances, and hence may be modified and even returned; this is especially useful for delegating to `super` (see notes below). - outputs : `Mapping` + outputs : `~collections.abc.Mapping` Mapping of output datasets, with the same structure as ``inputs``. label : `str` Label for this task in the pipeline (should be used in all @@ -789,14 +796,14 @@ def adjustQuantum( Returns ------- - adjusted_inputs : `Mapping` + adjusted_inputs : `~collections.abc.Mapping` Mapping of the same form as ``inputs`` with updated containers of - input `DatasetRef` objects. Connections that are not changed - should not be returned at all. Datasets may only be removed, not - added. Nested collections may be of any multi-pass iterable type, - and the order of iteration will set the order of iteration within - `PipelineTask.runQuantum`. - adjusted_outputs : `Mapping` + input `~lsst.daf.butler.DatasetRef` objects. Connections that are + not changed should not be returned at all. Datasets may only be + removed, not added. Nested collections may be of any multi-pass + iterable type, and the order of iteration will set the order of + iteration within `PipelineTask.runQuantum`. + adjusted_outputs : `~collections.abc.Mapping` Mapping of updated output datasets, with the same structure and interpretation as ``adjusted_inputs``. @@ -820,7 +827,9 @@ def adjustQuantum( returns an empty mapping (i.e. makes no adjustments). It should always called be via `super` by custom implementations, ideally at the end of the custom implementation with already-adjusted mappings when - any datasets are actually dropped, e.g.:: + any datasets are actually dropped, e.g.: + + .. code-block:: python def adjustQuantum(self, inputs, outputs, label, data_id): # Filter out some dataset refs for one connection. @@ -890,24 +899,24 @@ def adjustQuantum(self, inputs, outputs, label, data_id): def iterConnections( connections: PipelineTaskConnections, connectionType: str | Iterable[str] ) -> Generator[BaseConnection, None, None]: - """Creates an iterator over the selected connections type which yields + """Create an iterator over the selected connections type which yields all the defined connections of that type. Parameters ---------- - connections: `PipelineTaskConnections` + connections : `PipelineTaskConnections` An instance of a `PipelineTaskConnections` object that will be iterated over. - connectionType: `str` + connectionType : `str` The type of connections to iterate over, valid values are inputs, outputs, prerequisiteInputs, initInputs, initOutputs. Yields ------ - connection: `BaseConnection` + connection: `~.connectionTypes.BaseConnection` A connection defined on the input connections object of the type supplied. The yielded value Will be an derived type of - `BaseConnection`. + `~.connectionTypes.BaseConnection`. """ if isinstance(connectionType, str): connectionType = (connectionType,) @@ -920,18 +929,18 @@ class AdjustQuantumHelper: """Helper class for calling `PipelineTaskConnections.adjustQuantum`. This class holds `input` and `output` mappings in the form used by - `Quantum` and execution harness code, i.e. with `DatasetType` keys, - translating them to and from the connection-oriented mappings used inside - `PipelineTaskConnections`. + `Quantum` and execution harness code, i.e. with + `~lsst.daf.butler.DatasetType` keys, translating them to and from the + connection-oriented mappings used inside `PipelineTaskConnections`. """ inputs: NamedKeyMapping[DatasetType, list[DatasetRef]] """Mapping of regular input and prerequisite input datasets, grouped by - `DatasetType`. + `~lsst.daf.butler.DatasetType`. """ outputs: NamedKeyMapping[DatasetType, list[DatasetRef]] - """Mapping of output datasets, grouped by `DatasetType`. + """Mapping of output datasets, grouped by `~lsst.daf.butler.DatasetType`. """ inputs_adjusted: bool = False diff --git a/python/lsst/pipe/base/executionButlerBuilder.py b/python/lsst/pipe/base/executionButlerBuilder.py index 048850db5..7fcd292a6 100644 --- a/python/lsst/pipe/base/executionButlerBuilder.py +++ b/python/lsst/pipe/base/executionButlerBuilder.py @@ -274,11 +274,11 @@ def _setupNewButler( ---------- butler : `Butler` The original butler, upon which the execution butler is based. - outputLocation : `ResourcePath` + outputLocation : `~lsst.resources.ResourcePath` Location of the execution butler. dirExists : `bool` Does the ``outputLocation`` exist, and if so, should it be clobbered? - datastoreRoot : `ResourcePath`, optional + datastoreRoot : `~lsst.resources.ResourcePath`, optional Path for the execution butler datastore. If not specified, then the original butler's datastore will be used. @@ -378,12 +378,15 @@ def buildExecutionButler( datastoreRoot: Optional[ResourcePathExpression] = None, transfer: str = "auto", ) -> Butler: - r"""buildExecutionButler is a function that is responsible for exporting - input `QuantumGraphs` into a new minimal `~lsst.daf.butler.Butler` which - only contains datasets specified by the `QuantumGraph`. These datasets are - both those that already exist in the input `~lsst.daf.butler.Butler`, and - those that are expected to be produced during the execution of the - `QuantumGraph`. + r"""Create an execution butler. + + Responsible for exporting + input `QuantumGraph`\s into a new minimal `~lsst.daf.butler.Butler` which + only contains datasets specified by the `QuantumGraph`. + + These datasets are both those that already exist in the input + `~lsst.daf.butler.Butler`, and those that are expected to be produced + during the execution of the `QuantumGraph`. Parameters ---------- @@ -395,9 +398,9 @@ def buildExecutionButler( graph : `QuantumGraph` Graph containing nodes that are to be exported into an execution butler - outputLocation : convertible to `ResourcePath` + outputLocation : convertible to `~lsst.resources.ResourcePath` URI Location at which the execution butler is to be exported. May be - specified as a string or a `ResourcePath` instance. + specified as a string or a `~lsst.resources.ResourcePath` instance. run : `str`, optional The run collection that the exported datasets are to be placed in. If None, the default value in registry.defaults will be used. @@ -420,7 +423,7 @@ def buildExecutionButler( `~lsst.daf.butler.Butler` when creating the execution butler. If not supplied the `~lsst.daf.butler.Butler`\ 's `~lsst.daf.butler.Registry` default collections will be used. - datastoreRoot : convertible to `ResourcePath`, Optional + datastoreRoot : convertible to `~lsst.resources.ResourcePath`, Optional Root directory for datastore of execution butler. If `None`, then the original butler's datastore will be used. transfer : `str` @@ -432,15 +435,15 @@ def buildExecutionButler( Returns ------- executionButler : `lsst.daf.butler.Butler` - An instance of the newly created execution butler + An instance of the newly created execution butler. Raises ------ FileExistsError Raised if something exists in the filesystem at the specified output - location and clobber is `False` + location and clobber is `False`. NotADirectoryError - Raised if specified output URI does not correspond to a directory + Raised if specified output URI does not correspond to a directory. """ # Now require that if run is given it must match the graph run. if run and graph.metadata and run != (graph_run := graph.metadata.get("output_run")): diff --git a/python/lsst/pipe/base/formatters/pexConfig.py b/python/lsst/pipe/base/formatters/pexConfig.py index c6890f9e9..8480ae974 100644 --- a/python/lsst/pipe/base/formatters/pexConfig.py +++ b/python/lsst/pipe/base/formatters/pexConfig.py @@ -32,7 +32,8 @@ class PexConfigFormatter(FileFormatter): """Formatter implementation for reading and writing - `lsst.pex.config.Config` instances.""" + `lsst.pex.config.Config` instances. + """ extension = ".py" diff --git a/python/lsst/pipe/base/graph/_implDetails.py b/python/lsst/pipe/base/graph/_implDetails.py index e96a37ac1..6a38cb247 100644 --- a/python/lsst/pipe/base/graph/_implDetails.py +++ b/python/lsst/pipe/base/graph/_implDetails.py @@ -43,7 +43,7 @@ class _DatasetTracker(Generic[_T, _U]): - r"""This is a generic container for tracking keys which are produced or + r"""A generic container for tracking keys which are produced or consumed by some value. In the context of a QuantumGraph, keys may be `~lsst.daf.butler.DatasetRef`\ s and the values would be Quanta that either produce or consume those `~lsst.daf.butler.DatasetRef`\ s. @@ -68,15 +68,15 @@ def addProducer(self, key: _T, value: _U) -> None: Parameters ---------- - key : TypeVar - The type to track - value : TypeVar - The type associated with the production of the key + key : `~typing.TypeVar` + The type to track. + value : `~typing.TypeVar` + The type associated with the production of the key. Raises ------ ValueError - Raised if key is already declared to be produced by another value + Raised if key is already declared to be produced by another value. """ if (existing := self._producers.get(key)) is not None and existing != value: raise ValueError(f"Only one node is allowed to produce {key}, the current producer is {existing}") @@ -85,16 +85,17 @@ def addProducer(self, key: _T, value: _U) -> None: self._itemsDict[value].add(key) def removeProducer(self, key: _T, value: _U) -> None: - """Remove a value (e.g. QuantumNode or TaskDef) from being considered - a producer of the corresponding key. It is not an error to remove a - key that is not in the tracker. + """Remove a value (e.g. `QuantumNode` or `TaskDef`) from being + considered a producer of the corresponding key. + + It is not an error to remove a key that is not in the tracker. Parameters ---------- - key : TypeVar - The type to track - value : TypeVar - The type associated with the production of the key + key : `~typing.TypeVar` + The type to track. + value : `~typing.TypeVar` + The type associated with the production of the key. """ self._producers.pop(key, None) if self._createInverse: @@ -106,26 +107,27 @@ def addConsumer(self, key: _T, value: _U) -> None: Parameters ---------- - key : TypeVar - The type to track - value : TypeVar - The type associated with the consumption of the key + key : `~typing.TypeVar` + The type to track. + value : `~typing.TypeVar` + The type associated with the consumption of the key. """ self._consumers[key].add(value) if self._createInverse: self._itemsDict[value].add(key) def removeConsumer(self, key: _T, value: _U) -> None: - """Remove a value (e.g. QuantumNode or TaskDef) from being considered - a consumer of the corresponding key. It is not an error to remove a - key that is not in the tracker. + """Remove a value (e.g. `QuantumNode` or `TaskDef`) from being + considered a consumer of the corresponding key. + + It is not an error to remove a key that is not in the tracker. Parameters ---------- - key : TypeVar - The type to track - value : TypeVar - The type associated with the consumption of the key + key : `~typing.TypeVar` + The type to track. + value : `~typing.TypeVar` + The type associated with the consumption of the key. """ if (result := self._consumers.get(key)) is not None: result.discard(value) @@ -139,8 +141,8 @@ def getConsumers(self, key: _T) -> Set[_U]: Parameters ---------- - key : TypeVar - The type which has been tracked in the _DatasetTracker + key : `~typing.TypeVar` + The type which has been tracked in the `_DatasetTracker`. """ return self._consumers.get(key, set()) @@ -150,8 +152,8 @@ def getProducer(self, key: _T) -> Optional[_U]: Parameters ---------- - key : TypeVar - The type which has been tracked in the _DatasetTracker + key : `~typing.TypeVar` + The type which has been tracked in the `_DatasetTracker`. """ # This tracker may have had all nodes associated with a key removed # and if there are no refs (empty set) should return None @@ -163,10 +165,9 @@ def getAll(self, key: _T) -> set[_U]: Parameters ---------- - key : TypeVar - The type which has been tracked in the _DatasetTracker + key : `~typing.TypeVar` + The type which has been tracked in the `_DatasetTracker`. """ - return self.getConsumers(key).union(x for x in (self.getProducer(key),) if x is not None) @property @@ -180,9 +181,10 @@ def makeNetworkXGraph(self) -> nx.DiGraph: """Create a NetworkX graph out of all the contained keys, using the relations of producer and consumers to create the edges. - Returns: - graph : networkx.DiGraph - The graph created out of the supplied keys and their relations + Returns + ------- + graph : `networkx.DiGraph` + The graph created out of the supplied keys and their relations. """ graph = nx.DiGraph() for entry in self._producers.keys() | self._consumers.keys(): @@ -212,24 +214,24 @@ def remove(self, key: _T) -> None: Parameters ---------- - key : TypeVar - A key tracked by the DatasetTracker + key : `~typing.TypeVar` + A key tracked by the `_DatasetTracker`. """ self._producers.pop(key, None) self._consumers.pop(key, None) def __contains__(self, key: _T) -> bool: - """Check if a key is in the _DatasetTracker + """Check if a key is in the `_DatasetTracker`. Parameters ---------- - key : TypeVar - The key to check + key : `~typing.TypeVar` + The key to check. Returns ------- - contains : bool - Boolean of the presence of the supplied key + contains : `bool` + Boolean of the presence of the supplied key. """ return key in self._producers or key in self._consumers @@ -240,20 +242,22 @@ def _pruner( *, alreadyPruned: Optional[Set[QuantumNode]] = None, ) -> None: - r"""Prune supplied dataset refs out of datasetRefDict container, recursing - to additional nodes dependant on pruned refs. This function modifies - datasetRefDict in-place. + r"""Prune supplied dataset refs out of ``datasetRefDict`` container, + recursing to additional nodes dependant on pruned refs. Parameters ---------- - datasetRefDict : `_DatasetTracker[DatasetRef, QuantumNode]` - The dataset tracker that maps `DatasetRef`\ s to the Quantum Nodes - that produce/consume that `DatasetRef` - refsToRemove : `Iterable` of `DatasetRef` - The `DatasetRef`\ s which should be pruned from the input dataset - tracker + datasetRefDict : `_DatasetTracker` [ `~lsst.daf.butler.DatasetRef`, \ + `QuantumNode`] + The dataset tracker that maps `~lsst.daf.butler.DatasetRef`\ s to the + `QuantumNode`\s that produce/consume that + `~lsst.daf.butler.DatasetRef`. + This function modifies ``datasetRefDict`` in-place. + refsToRemove : `Iterable` of `~lsst.daf.butler.DatasetRef` + The `~lsst.daf.butler.DatasetRef`\ s which should be pruned from the + input dataset tracker. alreadyPruned : `set` of `QuantumNode` - A set of nodes which have been pruned from the dataset tracker + A set of nodes which have been pruned from the dataset tracker. """ if alreadyPruned is None: alreadyPruned = set() diff --git a/python/lsst/pipe/base/graph/_loadHelpers.py b/python/lsst/pipe/base/graph/_loadHelpers.py index db1719a3c..47e3962c0 100644 --- a/python/lsst/pipe/base/graph/_loadHelpers.py +++ b/python/lsst/pipe/base/graph/_loadHelpers.py @@ -40,7 +40,7 @@ @dataclass class LoadHelper(ContextManager["LoadHelper"]): - """This is a helper class to assist with selecting the appropriate loader + """Helper class to assist with selecting the appropriate loader and managing any contexts that may be needed. This helper will raise a `ValueError` if the specified file does not appear @@ -100,15 +100,15 @@ def _validateSave(self, magic: bytes, versionBytes: bytes) -> int: --------- magic : `bytes` The first few bytes of the file, used to verify it is a - QuantumGraph save file + `QuantumGraph` save file. versionBytes : `bytes` The next few bytes from the beginning of the file, used to parse - which version of the QuantumGraph file the save corresponds to + which version of the `QuantumGraph` file the save corresponds to. Returns ------- save_version : `int` - The save version parsed from the supplied bytes + The save version parsed from the supplied bytes. Raises ------ @@ -154,7 +154,7 @@ def load( nodes: Optional[Iterable[Union[UUID, str]]] = None, graphID: Optional[str] = None, ) -> QuantumGraph: - """Loads in the specified nodes from the graph + """Load in the specified nodes from the graph. Load in the `QuantumGraph` containing only the nodes specified in the ``nodes`` parameter from the graph specified at object creation. If @@ -179,7 +179,7 @@ def load( Returns ------- graph : `QuantumGraph` - The loaded `QuantumGraph` object + The loaded `QuantumGraph` object. Raises ------ @@ -188,9 +188,10 @@ def load( `QuantumGraph` or if graphID parameter does not match the graph being loaded. RuntimeError - Raised if Supplied DimensionUniverse is not compatible with the - DimensionUniverse saved in the graph - Raised if the method was not called from within a context block + Raised if supplied `~lsst.daf.butler.DimensionUniverse` is not + compatible with the `~lsst.daf.butler.DimensionUniverse` saved in + the graph. Raised if the method was not called from within a + context block. """ if self._resourceHandle is None: raise RuntimeError("Load can only be used within a context manager") @@ -232,12 +233,13 @@ def _readBytes(self, start: int, stop: int) -> bytes: Returns ------- result : `bytes` - The byte range specified from the `ResourceHandle` + The byte range specified from the + `~lsst.resources.ResourceHandleProtocol`. Raises ------ RuntimeError - Raise if the method was not called from within a context block + Raise if the method was not called from within a context block. """ if self._resourceHandle is None: raise RuntimeError("_readBytes must be called from within a context block") diff --git a/python/lsst/pipe/base/graph/_versionDeserializers.py b/python/lsst/pipe/base/graph/_versionDeserializers.py index f03973a4a..8c51aa443 100644 --- a/python/lsst/pipe/base/graph/_versionDeserializers.py +++ b/python/lsst/pipe/base/graph/_versionDeserializers.py @@ -68,8 +68,8 @@ class StructSizeDescriptor: - """This is basically a class level property. It exists to report the size - (number of bytes) of whatever the formatter string is for a deserializer + """Class level property. It exists to report the size + (number of bytes) of whatever the formatter string is for a deserializer. """ def __get__(self, inst: Optional[DeserializerBase], owner: Type[DeserializerBase]) -> int: @@ -94,17 +94,22 @@ def __init_subclass__(cls) -> None: super().__init_subclass__() def unpackHeader(self, rawHeader: bytes) -> Optional[str]: - """Transforms the raw bytes corresponding to the header of a save into - a string of the header information. Returns none if the save format has - no header string implementation (such as save format 1 that is all - pickle) + """Transform the raw bytes corresponding to the header of a save into + a string of the header information. Parameters ---------- rawheader : bytes The bytes that are to be parsed into the header information. These are the bytes after the preamble and structsize number of bytes - and before the headerSize bytes + and before the headerSize bytes. + + Returns + ------- + header : `str` or `None` + Header information as a string. Returns `None` if the save format + has no header string implementation (such as save format 1 that is + all pickle). """ raise NotImplementedError("Base class does not implement this method") @@ -134,7 +139,7 @@ def constructGraph( _readBytes: Callable[[int, int], bytes], universe: Optional[DimensionUniverse] = None, ) -> QuantumGraph: - """Constructs a graph from the deserialized information. + """Construct a graph from the deserialized information. Parameters ---------- @@ -563,8 +568,8 @@ def constructGraph( # Turn the json back into the pydandtic model nodeDeserialized = SerializedQuantumNode.direct(**dump) - # attach the dictionary of dimension records to the pydandtic model - # these are stored seperately because the are stored over and over + # attach the dictionary of dimension records to the pydantic model + # these are stored separately because the are stored over and over # and this saves a lot of space and time. nodeDeserialized.quantum.dimensionRecords = self.infoMappings.dimensionRecords # get the label for the current task diff --git a/python/lsst/pipe/base/graph/graph.py b/python/lsst/pipe/base/graph/graph.py index 784cc4f99..5d3fac6d7 100644 --- a/python/lsst/pipe/base/graph/graph.py +++ b/python/lsst/pipe/base/graph/graph.py @@ -180,7 +180,7 @@ def _buildGraphs( globalInitOutputs: Optional[Iterable[DatasetRef]] = None, registryDatasetTypes: Optional[Iterable[DatasetType]] = None, ) -> None: - """Builds the graph that is used to store the relation between tasks, + """Build the graph that is used to store the relation between tasks, and the graph that holds the relations between quanta """ self._metadata = metadata @@ -1024,7 +1024,7 @@ def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> Opt raise ValueError("Only know how to handle files saved as `qgraph`") def buildAndPrintHeader(self) -> None: - """Creates a header that would be used in a save of this object and + """Create a header that would be used in a save of this object and prints it out to standard out. """ _, header = self._buildSaveObject(returnHeader=True) @@ -1295,7 +1295,9 @@ def updateRun(self, run: str, *, metadata_key: str | None = None, update_graph_i dataset_id_map = {} def _update_output_refs_in_place(refs: list[DatasetRef], run: str) -> None: - """Updated list of DatasetRef with new run and dataset IDs.""" + """Update list of `~lsst.daf.butler.DatasetRef` with new run and + dataset IDs. + """ new_refs = [] for ref in refs: new_ref = DatasetRef(ref.datasetType, ref.dataId, run=run, conform=False) @@ -1304,7 +1306,9 @@ def _update_output_refs_in_place(refs: list[DatasetRef], run: str) -> None: refs[:] = new_refs def _update_input_refs_in_place(refs: list[DatasetRef], run: str) -> None: - """Updated list of DatasetRef with IDs from dataset_id_map.""" + """Update list of `~lsst.daf.butler.DatasetRef` with IDs from + dataset_id_map. + """ new_refs = [] for ref in refs: if (new_id := dataset_id_map.get(ref.id)) is not None: @@ -1363,7 +1367,7 @@ def __contains__(self, node: QuantumNode) -> bool: return self._connectedQuanta.has_node(node) def __getstate__(self) -> dict: - """Stores a compact form of the graph as a list of graph nodes, and a + """Store a compact form of the graph as a list of graph nodes, and a tuple of task labels and task configs. The full graph can be reconstructed with this information, and it preserves the ordering of the graph nodes. diff --git a/python/lsst/pipe/base/graph/quantumNode.py b/python/lsst/pipe/base/graph/quantumNode.py index dbc5c30a7..05bf97069 100644 --- a/python/lsst/pipe/base/graph/quantumNode.py +++ b/python/lsst/pipe/base/graph/quantumNode.py @@ -79,10 +79,10 @@ class NodeId: @dataclass(frozen=True) class QuantumNode: - """This class represents a node in the quantum graph. + """Class representing a node in the quantum graph. - The quantum attribute represents the data that is to be processed at this - node. + The ``quantum`` attribute represents the data that is to be processed at + this node. """ quantum: Quantum diff --git a/python/lsst/pipe/base/graphBuilder.py b/python/lsst/pipe/base/graphBuilder.py index 7d2d322e2..d53cb5aec 100644 --- a/python/lsst/pipe/base/graphBuilder.py +++ b/python/lsst/pipe/base/graphBuilder.py @@ -73,11 +73,13 @@ @dataclass class _RefHolder: - """Placeholder for `DatasetRef` representing a future resolved reference. + r"""Placeholder for `~lsst.daf.butler.DatasetRef` representing a future + resolved reference. - As we eliminated unresolved DatasetRefs we now use `None` to represent - a reference that is yet to be resolved. Information about its corresponding - dataset type and coordinate is stored in `_DatasetDict` mapping. + As we eliminated unresolved `~lsst.daf.butler.DatasetRef`\s we now use + `None` to represent a reference that is yet to be resolved. Information + about its corresponding dataset type and coordinate is stored in + `_DatasetDict` mapping. """ dataset_type: DatasetType @@ -93,7 +95,8 @@ class _RefHolder: @property def resolved_ref(self) -> DatasetRef: """Access resolved reference, should only be called after the - reference is set (`DatasetRef`).""" + reference is set (`~lsst.daf.butler.DatasetRef`). + """ assert self.ref is not None, "Dataset reference is not set." return self.ref @@ -1373,7 +1376,7 @@ def makeQuantumGraph( the scaffolding data structure. Parameters - --------- + ---------- registry : `lsst.daf.butler.Registry` Registry for the data repository; used for all data ID queries. metadata : Optional Mapping of `str` to primitives diff --git a/python/lsst/pipe/base/pipeTools.py b/python/lsst/pipe/base/pipeTools.py index 4bb572bc1..84df10ae3 100644 --- a/python/lsst/pipe/base/pipeTools.py +++ b/python/lsst/pipe/base/pipeTools.py @@ -74,7 +74,7 @@ class PipelineDataCycleError(Exception): def isPipelineOrdered( pipeline: Union[Pipeline, Iterable[TaskDef]], taskFactory: Optional[TaskFactory] = None ) -> bool: - """Checks whether tasks in pipeline are correctly ordered. + """Check whether tasks in pipeline are correctly ordered. Pipeline is correctly ordered if for any DatasetType produced by a task in a pipeline all its consumer tasks are located after producer. @@ -142,10 +142,9 @@ def orderPipeline(pipeline: List[TaskDef]) -> List[TaskDef]: `DuplicateOutputError` is raised when there is more than one producer for a dataset type. `PipelineDataCycleError` is also raised when pipeline has dependency - cycles. `MissingTaskFactoryError` is raised when TaskFactory is needed but - not provided. + cycles. `MissingTaskFactoryError` is raised when `TaskFactory` is needed + but not provided. """ - # This is a modified version of Kahn's algorithm that preserves order # build mapping of the tasks to their inputs and outputs diff --git a/python/lsst/pipe/base/pipeline.py b/python/lsst/pipe/base/pipeline.py index e5754cd72..207b86f45 100644 --- a/python/lsst/pipe/base/pipeline.py +++ b/python/lsst/pipe/base/pipeline.py @@ -232,7 +232,7 @@ def __hash__(self) -> int: @classmethod def _unreduce(cls, taskName: str, config: PipelineTaskConfig, label: str) -> TaskDef: - """Custom callable for unpickling. + """Unpickle pickle. Custom callable for unpickling. All arguments are forwarded directly to the constructor; this trampoline is only needed because ``__reduce__`` callables can't be @@ -268,7 +268,7 @@ def fromFile(cls, filename: str) -> Pipeline: A path that points to a pipeline defined in yaml format. This filename may also supply additional labels to be used in subsetting the loaded Pipeline. These labels are separated from - the path by a \\#, and may be specified as a comma separated + the path by a ``#``, and may be specified as a comma separated list, or a range denoted as beginning..end. Beginning or end may be empty, in which case the range will be a half open interval. Unlike python iteration bounds, end bounds are *INCLUDED*. Note @@ -280,7 +280,7 @@ def fromFile(cls, filename: str) -> Pipeline: ------- pipeline: `Pipeline` The pipeline loaded from specified location with appropriate (if - any) subsetting + any) subsetting. Notes ----- @@ -298,27 +298,28 @@ def from_uri(cls, uri: ResourcePathExpression) -> Pipeline: Parameters ---------- - uri : convertible to `ResourcePath` + uri : convertible to `~lsst.resources.ResourcePath` If a string is supplied this should be a URI path that points to a pipeline defined in yaml format, either as a direct path to the - yaml file, or as a directory containing a "pipeline.yaml" file (the - form used by `write_to_uri` with ``expand=True``). This uri may + yaml file, or as a directory containing a ``pipeline.yaml`` file + the form used by `write_to_uri` with ``expand=True``). This uri may also supply additional labels to be used in subsetting the loaded - Pipeline. These labels are separated from the path by a \\#, and - may be specified as a comma separated list, or a range denoted as - beginning..end. Beginning or end may be empty, in which case the + `Pipeline`. These labels are separated from the path by a ``#``, + and may be specified as a comma separated list, or a range denoted + as beginning..end. Beginning or end may be empty, in which case the range will be a half open interval. Unlike python iteration bounds, end bounds are *INCLUDED*. Note that range based selection is not well defined for pipelines that are not linear in nature, and correct behavior is not guaranteed, or may vary from run to run. - The same specifiers can be used with a `ResourcePath` object, by - being the sole contents in the fragments attribute. + The same specifiers can be used with a + `~lsst.resources.ResourcePath` object, by being the sole contents + in the fragments attribute. Returns ------- pipeline : `Pipeline` The pipeline loaded from specified location with appropriate (if - any) subsetting + any) subsetting. Notes ----- @@ -746,16 +747,16 @@ def write_to_uri(self, uri: ResourcePathExpression) -> None: Parameters ---------- - uri : convertible to `ResourcePath` - URI to write to; may have any scheme with `ResourcePath` write - support or no scheme for a local file/directory. Should have a - ``.yaml``. + uri : convertible to `~lsst.resources.ResourcePath` + URI to write to; may have any scheme with + `~lsst.resources.ResourcePath` write support or no scheme for a + local file/directory. Should have a ``.yaml``. """ self._pipelineIR.write_to_uri(uri) def toExpandedPipeline(self) -> Generator[TaskDef, None, None]: - """Returns a generator of TaskDefs which can be used to create quantum - graphs. + r"""Return a generator of `TaskDef`\s which can be used to create + quantum graphs. Returns ------- @@ -944,7 +945,7 @@ def makeDatasetTypesSet( is_input: bool, freeze: bool = True, ) -> NamedValueSet[DatasetType]: - """Constructs a set of true `DatasetType` objects + """Construct a set of true `DatasetType` objects Parameters ---------- diff --git a/python/lsst/pipe/base/pipelineIR.py b/python/lsst/pipe/base/pipelineIR.py index d6f206f57..6056edb77 100644 --- a/python/lsst/pipe/base/pipelineIR.py +++ b/python/lsst/pipe/base/pipelineIR.py @@ -41,9 +41,10 @@ class _Tags(enum.Enum): class PipelineYamlLoader(yaml.SafeLoader): - """This is a specialized version of yaml's SafeLoader. It checks and raises - an exception if it finds that there are multiple instances of the same key - found inside a pipeline file at a given scope. + """Specialized version of yaml's SafeLoader. + + It checks and raises an exception if it finds that there are multiple + instances of the same key found inside a pipeline file at a given scope. """ def construct_mapping(self, node: yaml.MappingNode, deep: bool = False) -> dict[Hashable, Any]: @@ -89,7 +90,8 @@ class ContractError(Exception): @dataclass class ContractIR: """Intermediate representation of configuration contracts read from a - pipeline yaml file.""" + pipeline yaml file. + """ contract: str """A string of python code representing one or more conditions on configs @@ -191,18 +193,20 @@ class ParametersIR: task configuration blocks to specify configuration values. They may not be used in the special ``file`` or ``python`` blocks. - Example: - paramters: - shared_value: 14 - tasks: - taskA: - class: modA - config: - field1: parameters.shared_value - taskB: - class: modB - config: - field2: parameters.shared_value + Examples + -------- + .. code-block:: yaml + parameters: + shared_value: 14 + tasks: + taskA: + class: modA + config: + field1: parameters.shared_value + taskB: + class: modB + config: + field2: parameters.shared_value """ mapping: MutableMapping[str, str] @@ -269,7 +273,7 @@ def to_primitives(self) -> dict[str, str | dict | list[str]]: return accumulate def formatted(self, parameters: ParametersIR) -> ConfigIR: - """Returns a new ConfigIR object that is formatted according to the + """Return a new ConfigIR object that is formatted according to the specified parameters Parameters @@ -298,7 +302,7 @@ def formatted(self, parameters: ParametersIR) -> ConfigIR: return new_config def maybe_merge(self, other_config: "ConfigIR") -> Generator["ConfigIR", None, None]: - """Merges another instance of a `ConfigIR` into this instance if + """Merge another instance of a `ConfigIR` into this instance if possible. This function returns a generator that is either self if the configs were merged, or self, and other_config if that could not be merged. @@ -376,7 +380,7 @@ def to_primitives(self) -> dict[str, str | list[dict]]: return accumulate def add_or_update_config(self, other_config: ConfigIR) -> None: - """Adds a `ConfigIR` to this task if one is not present. Merges configs + """Add a `ConfigIR` to this task if one is not present. Merges configs if there is a `ConfigIR` present and the dataId keys of both configs match, otherwise adds a new entry to the config list. The exception to the above is that if either the last config or other_config has a @@ -551,7 +555,7 @@ def _read_contracts(self, loaded_yaml: dict[str, Any]) -> None: """Process the contracts portion of the loaded yaml document Parameters - --------- + ---------- loaded_yaml : `dict` A dictionary which matches the structure that would be produced by a yaml reader which parses a pipeline definition document @@ -570,7 +574,7 @@ def _read_parameters(self, loaded_yaml: dict[str, Any]) -> None: """Process the parameters portion of the loaded yaml document Parameters - --------- + ---------- loaded_yaml : `dict` A dictionary which matches the structure that would be produced by a yaml reader which parses a pipeline definition document @@ -597,7 +601,7 @@ def _read_labeled_subsets(self, loaded_yaml: dict[str, Any]) -> None: self.labeled_subsets[key] = LabeledSubset.from_primitives(key, value) def _verify_labeled_subsets(self) -> None: - """Verifies that all the labels in each named subset exist within the + """Verify that all the labels in each named subset exist within the pipeline. """ # Verify that all labels defined in a labeled subset are in the @@ -617,7 +621,7 @@ def _read_imports(self, loaded_yaml: dict[str, Any]) -> None: """Process the inherits portion of the loaded yaml document Parameters - --------- + ---------- loaded_yaml : `dict` A dictionary which matches the structure that would be produced by a yaml reader which parses a pipeline definition document @@ -656,9 +660,9 @@ def merge_pipelines(self, pipelines: Iterable[PipelineIR]) -> None: Parameters ---------- - pipelines : `Iterable` of `PipelineIR` objects - An `Iterable` that contains one or more `PipelineIR` objects to - merge into this object. + pipelines : `~collections.abc.Iterable` of `PipelineIR` objects + An `~collections.abc.Iterable` that contains one or more + `PipelineIR` objects to merge into this object. Raises ------ @@ -737,7 +741,7 @@ def _read_tasks(self, loaded_yaml: dict[str, Any]) -> None: """Process the tasks portion of the loaded yaml document Parameters - --------- + ---------- loaded_yaml : `dict` A dictionary which matches the structure that would be produced by a yaml reader which parses a pipeline definition document @@ -817,7 +821,6 @@ def subset_from_labels(self, labelSpecifier: set[str]) -> PipelineIR: members of the subset present in the pipeline will be removed from the resulting pipeline. """ - pipeline = copy.deepcopy(self) # update the label specifier to expand any named subsets @@ -875,7 +878,7 @@ def from_uri(cls, uri: ResourcePathExpression) -> PipelineIR: Parameters ---------- - uri: convertible to `ResourcePath` + uri: convertible to `~lsst.resources.ResourcePath` Location of document to use in creating a `PipelineIR` object. Returns @@ -894,7 +897,7 @@ def write_to_uri(self, uri: ResourcePathExpression) -> None: Parameters ---------- - uri: convertible to `ResourcePath` + uri: convertible to `~lsst.resources.ResourcePath` Location of document to write a `PipelineIR` object. """ with ResourcePath(uri).open("w") as buffer: diff --git a/python/lsst/pipe/base/pipelineTask.py b/python/lsst/pipe/base/pipelineTask.py index 221f4eee6..cfe66904a 100644 --- a/python/lsst/pipe/base/pipelineTask.py +++ b/python/lsst/pipe/base/pipelineTask.py @@ -19,7 +19,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -"""This module defines PipelineTask class and related methods. +"""Define `PipelineTask` class and related methods. """ from __future__ import annotations @@ -155,8 +155,8 @@ def runQuantum( inputRefs: InputQuantizedConnection, outputRefs: OutputQuantizedConnection, ) -> None: - """Method to do butler IO and or transforms to provide in memory - objects for tasks run method + """Do butler IO and transform to provide in memory + objects for tasks `~Task.run` method. Parameters ---------- diff --git a/python/lsst/pipe/base/script/transfer_from_graph.py b/python/lsst/pipe/base/script/transfer_from_graph.py index 567a24f3b..547885ae8 100644 --- a/python/lsst/pipe/base/script/transfer_from_graph.py +++ b/python/lsst/pipe/base/script/transfer_from_graph.py @@ -57,7 +57,6 @@ def transfer_from_graph( count : `int` Actual count of transferred datasets. """ - # Read whole graph into memory qgraph = QuantumGraph.loadUri(graph) diff --git a/python/lsst/pipe/base/task.py b/python/lsst/pipe/base/task.py index d3e68a47c..26aecf3d8 100644 --- a/python/lsst/pipe/base/task.py +++ b/python/lsst/pipe/base/task.py @@ -277,7 +277,7 @@ def getName(self) -> str: taskName : `str` Name of the task. - See also + See Also -------- getFullName """ @@ -302,12 +302,12 @@ def makeSubtask(self, name: str, **keyArgs: Any) -> None: ---------- name : `str` Brief name of the subtask. - keyArgs + **keyArgs Extra keyword arguments used to construct the task. The following arguments are automatically provided and cannot be overridden: - - "config". - - "parentTask". + - ``config``. + - ``parentTask``. Notes ----- @@ -343,9 +343,9 @@ def timer(self, name: str, logLevel: int = logging.DEBUG) -> Iterator[None]: with self.timer("someCodeToTime"): pass # code to time - See also + See Also -------- - timer.logInfo + lsst.utils.timer.logInfo """ logInfo(obj=self, prefix=name + "Start", logLevel=logLevel) try: @@ -365,7 +365,7 @@ def makeField(cls, doc: str) -> ConfigurableField: Returns ------- configurableField : `lsst.pex.config.ConfigurableField` - A `~ConfigurableField` for this task. + A `~lsst.pex.config.ConfigurableField` for this task. Examples -------- @@ -416,7 +416,7 @@ def _unpickle_via_factory( return factory(*args, **kwargs) def _reduce_kwargs(self) -> Dict[str, Any]: - """Returns a dict of the keyword arguments that should be used + """Return a dict of the keyword arguments that should be used by `__reduce__`. Subclasses with additional arguments should always call the parent diff --git a/python/lsst/pipe/base/tests/mocks/_data_id_match.py b/python/lsst/pipe/base/tests/mocks/_data_id_match.py index c0ae3b283..4963ed9a3 100644 --- a/python/lsst/pipe/base/tests/mocks/_data_id_match.py +++ b/python/lsst/pipe/base/tests/mocks/_data_id_match.py @@ -34,7 +34,7 @@ class _DataIdMatchTreeVisitor(TreeVisitor): """Expression tree visitor which evaluates expression using values from - DataId. + `~lsst.daf.butler.DataId`. """ def __init__(self, dataId: DataId): @@ -142,11 +142,11 @@ def __init__(self, expression: str): self.tree = parser.parse(expression) def match(self, dataId: DataId) -> bool: - """Matches DataId contents against the expression. + """Match DataId contents against the expression. Parameters ---------- - dataId : `DataId` + dataId : `~lsst.daf.butler.DataId` DataId that is matched against an expression. Returns @@ -158,7 +158,7 @@ def match(self, dataId: DataId) -> bool: ------ KeyError Raised when identifier in expression is not defined for given - `DataId`. + `~lsst.daf.butler.DataId`. TypeError Raised when expression evaluates to a non-boolean type or when operation in expression cannot be performed on operand types. diff --git a/python/lsst/pipe/base/tests/mocks/_pipeline_task.py b/python/lsst/pipe/base/tests/mocks/_pipeline_task.py index 6b16c344e..392fda4cd 100644 --- a/python/lsst/pipe/base/tests/mocks/_pipeline_task.py +++ b/python/lsst/pipe/base/tests/mocks/_pipeline_task.py @@ -160,6 +160,8 @@ def __init__(self, *, config: MockPipelineTaskConfig): class MockPipelineTaskConfig(PipelineTaskConfig, pipelineConnections=MockPipelineTaskConnections): + """Configuration class for `MockPipelineTask`.""" + fail_condition = Field[str]( dtype=str, default="", diff --git a/python/lsst/pipe/base/tests/no_dimensions.py b/python/lsst/pipe/base/tests/no_dimensions.py index 796fb5121..d63de4935 100644 --- a/python/lsst/pipe/base/tests/no_dimensions.py +++ b/python/lsst/pipe/base/tests/no_dimensions.py @@ -42,6 +42,8 @@ class NoDimensionsTestConnections(PipelineTaskConnections, dimensions=set()): + """Connections class for `NoDimensionsTestTask`.""" + input = connectionTypes.Input( name="input", doc="some dict-y input data for testing", storageClass="StructuredDataDict" ) @@ -51,6 +53,8 @@ class NoDimensionsTestConnections(PipelineTaskConnections, dimensions=set()): class NoDimensionsTestConfig(PipelineTaskConfig, pipelineConnections=NoDimensionsTestConnections): + """Configuration for `NoDimensionTestTask`.""" + key = Field[str](doc="String key for the dict entry the task sets.", default="one") value = Field[int](doc="Integer value for the dict entry the task sets.", default=1) outputSC = Field[str](doc="Output storage class requested", default="dict") diff --git a/python/lsst/pipe/base/tests/simpleQGraph.py b/python/lsst/pipe/base/tests/simpleQGraph.py index 12fe6a9e1..c835fe011 100644 --- a/python/lsst/pipe/base/tests/simpleQGraph.py +++ b/python/lsst/pipe/base/tests/simpleQGraph.py @@ -204,7 +204,7 @@ def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[ def makeSimplePipeline(nQuanta: int, instrument: Optional[str] = None) -> Pipeline: """Make a simple Pipeline for tests. - This is called by ``makeSimpleQGraph`` if no pipeline is passed to that + This is called by `makeSimpleQGraph()` if no pipeline is passed to that function. It can also be used to customize the pipeline used by ``makeSimpleQGraph`` function by calling this first and passing the result to it. @@ -300,7 +300,6 @@ def populateButler( dataset type names. By default a single dataset of type "add_dataset0" is added to a ``butler.run`` collection. """ - # Add dataset types to registry taskDefs = list(pipeline.toExpandedPipeline()) registerDatasetTypes(butler.registry, taskDefs) @@ -366,7 +365,7 @@ def makeSimpleQGraph( bind: Optional[Mapping[str, Any]] = None, metadata: Optional[MutableMapping[str, Any]] = None, ) -> Tuple[Butler, QuantumGraph]: - """Make simple QuantumGraph for tests. + """Make simple `QuantumGraph` for tests. Makes simple one-task pipeline with AddTask, sets up in-memory registry and butler, fills them with minimal data, and generates QuantumGraph with @@ -414,10 +413,10 @@ def makeSimpleQGraph( `DatasetQueryConstraintVariant.ALL`. makeDatastoreRecords : `bool`, optional If `True` then add datstore records to generated quanta. - bind : `Mapping`, optional + bind : `~collections.abc.Mapping`, optional Mapping containing literal values that should be injected into the ``userQuery`` expression, keyed by the identifiers they replace. - metadata : `Mapping`, optional + metadata : `~collections.abc.Mapping`, optional Optional graph metadata. Returns @@ -427,7 +426,6 @@ def makeSimpleQGraph( qgraph : `~lsst.pipe.base.QuantumGraph` Quantum graph instance """ - if pipeline is None: pipeline = makeSimplePipeline(nQuanta=nQuanta, instrument=instrument) diff --git a/python/lsst/pipe/base/tests/util.py b/python/lsst/pipe/base/tests/util.py index 1d9919843..832cd91c0 100644 --- a/python/lsst/pipe/base/tests/util.py +++ b/python/lsst/pipe/base/tests/util.py @@ -34,17 +34,17 @@ def check_output_run(graph: QuantumGraph, run: str) -> list[DatasetRef]: Parameters ---------- - graph : `Quantumgraph` + graph : `QuantumGraph` Quantum graph. run : `str` Output run name. Returns - refs : `list` [ `DatasetRef` ] + ------- + refs : `list` [ `~lsst.daf.butler.DatasetRef` ] List of output/intermediate dataset references that do NOT belong to the specified run. """ - # Collect all inputs/outputs, so that we can build intermediate refs. output_refs = [] input_refs = [] From 7febe40d9bce9d9db05f9e1cdc6ef6ae0a6b97ed Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 9 Jun 2023 14:08:17 -0700 Subject: [PATCH 05/10] Add pydocstyle configuration --- pyproject.toml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 808bdea9a..17bbfb9c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -125,3 +125,14 @@ exclude_lines = [ "if __name__ == .__main__.:", "if TYPE_CHECKING:", ] + +[tool.pydocstyle] +convention = "numpy" +# Our coding style does not require docstrings for magic methods (D105) +# Our docstyle documents __init__ at the class level (D107) +# We allow methods to inherit docstrings and this is not compatible with D102. +# Docstring at the very first line is not required +# D200, D205 and D400 all complain if the first sentence of the docstring does +# not fit on one line. +# Do not require docstrings in __init__.py files (D104) +add-ignore = ["D107", "D105", "D102", "D100", "D200", "D205", "D400", "D104"] From 71939fa32b4d07b18bede7eda13bfdae872e2bf9 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 9 Jun 2023 14:09:01 -0700 Subject: [PATCH 06/10] Add pydocstyle check --- .github/workflows/docstyle.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/workflows/docstyle.yaml diff --git a/.github/workflows/docstyle.yaml b/.github/workflows/docstyle.yaml new file mode 100644 index 000000000..38fb4423c --- /dev/null +++ b/.github/workflows/docstyle.yaml @@ -0,0 +1,13 @@ +name: Run docstyle + +on: + push: + branches: + - main + pull_request: + +jobs: + call-workflow: + uses: lsst/rubin_workflows/.github/workflows/docstyle.yaml@main + with: + args: "python/" From 27e58eca8f7160f570a5686c9518f685c65cc5da Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 9 Jun 2023 14:30:48 -0700 Subject: [PATCH 07/10] More docstring fixes --- python/lsst/pipe/base/_instrument.py | 18 ++-- python/lsst/pipe/base/_task_metadata.py | 2 +- python/lsst/pipe/base/butlerQuantumContext.py | 10 +-- python/lsst/pipe/base/config.py | 2 +- python/lsst/pipe/base/connectionTypes.py | 22 ++--- .../lsst/pipe/base/executionButlerBuilder.py | 3 +- python/lsst/pipe/base/graph/_implDetails.py | 2 +- python/lsst/pipe/base/graph/_loadHelpers.py | 2 +- python/lsst/pipe/base/graph/graph.py | 2 +- python/lsst/pipe/base/graphBuilder.py | 90 +++++++++++-------- python/lsst/pipe/base/pipeline.py | 27 +++--- python/lsst/pipe/base/taskFactory.py | 3 +- 12 files changed, 102 insertions(+), 81 deletions(-) diff --git a/python/lsst/pipe/base/_instrument.py b/python/lsst/pipe/base/_instrument.py index 7d9664c01..07f4439ea 100644 --- a/python/lsst/pipe/base/_instrument.py +++ b/python/lsst/pipe/base/_instrument.py @@ -46,10 +46,10 @@ class Instrument(metaclass=ABCMeta): Parameters ---------- collection_prefix : `str`, optional - Prefix for collection names to use instead of the intrument's own name. - This is primarily for use in simulated-data repositories, where the - instrument name may not be necessary and/or sufficient to distinguish - between collections. + Prefix for collection names to use instead of the instrument's own + name. This is primarily for use in simulated-data repositories, where + the instrument name may not be necessary and/or sufficient to + distinguish between collections. Notes ----- @@ -142,7 +142,7 @@ def fromName(name: str, registry: Registry, collection_prefix: Optional[str] = N registry : `lsst.daf.butler.Registry` Butler registry to query to find the information. collection_prefix : `str`, optional - Prefix for collection names to use instead of the intrument's own + Prefix for collection names to use instead of the instrument's own name. This is primarily for use in simulated-data repositories, where the instrument name may not be necessary and/or sufficient to distinguish between collections. @@ -199,7 +199,7 @@ def from_string( Butler registry to query to find information about the instrument, by default `None`. collection_prefix : `str`, optional - Prefix for collection names to use instead of the intrument's own + Prefix for collection names to use instead of the instrument's own name. This is primarily for use in simulated-data repositories, where the instrument name may not be necessary and/or sufficient to distinguish between collections. @@ -247,10 +247,10 @@ def from_data_id(data_id: DataCoordinate, collection_prefix: Optional[str] = Non Parameters ---------- - data_id : `DataCoordinate` + data_id : `~lsst.daf.butler.DataCoordinate` Expanded data ID that includes the instrument dimension. collection_prefix : `str`, optional - Prefix for collection names to use instead of the intrument's own + Prefix for collection names to use instead of the instrument's own name. This is primarily for use in simulated-data repositories, where the instrument name may not be necessary and/or sufficient to distinguish between collections. @@ -282,7 +282,7 @@ def _from_cls_name(cls_name: str, collection_prefix: str | None = None) -> Instr cls_name : `str` Fully-qualified name of the type. collection_prefix : `str`, optional - Prefix for collection names to use instead of the intrument's own + Prefix for collection names to use instead of the instrument's own name. This is primarily for use in simulated-data repositories, where the instrument name may not be necessary and/or sufficient to distinguish between collections. diff --git a/python/lsst/pipe/base/_task_metadata.py b/python/lsst/pipe/base/_task_metadata.py index a8982b1a0..bf731bc70 100644 --- a/python/lsst/pipe/base/_task_metadata.py +++ b/python/lsst/pipe/base/_task_metadata.py @@ -80,7 +80,7 @@ def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata": Parameters ---------- - d : `Mapping` + d : `~collections.abc.Mapping` Mapping to convert. Can be hierarchical. Any dictionaries in the hierarchy are converted to `TaskMetadata`. diff --git a/python/lsst/pipe/base/butlerQuantumContext.py b/python/lsst/pipe/base/butlerQuantumContext.py index aa50d112e..fa3f16e33 100644 --- a/python/lsst/pipe/base/butlerQuantumContext.py +++ b/python/lsst/pipe/base/butlerQuantumContext.py @@ -133,8 +133,8 @@ def get( Raises ------ ValueError - Raised if a `DatasetRef` is passed to get that is not defined in - the quantum object + Raised if a `~lsst.daf.butler.DatasetRef` is passed to get that is + not defined in the quantum object """ # Set up a periodic logger so log messages can be issued if things # are taking too long. @@ -214,8 +214,8 @@ def put( ``[datasetRef1, datasetRef2]`` then ``values.calexp`` should be ``[calexp1, calexp2]``. Like wise if there is a single ref, then only a single object need be passed. The same restriction applies - if dataset is directly a `list` of `DatasetRef` or a single - `DatasetRef`. + if dataset is directly a `list` of `~lsst.daf.butler.DatasetRef` + or a single `~lsst.daf.butler.DatasetRef`. dataset This argument may either be an `InputQuantizedConnection` which describes all the inputs of a quantum, a list of @@ -269,7 +269,7 @@ def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) ---------- ref : `list` [ `~lsst.daf.butler.DatasetRef` ] or \ `~lsst.daf.butler.DatasetRef` - Either a `list` or a single `DatasetRef` to check + Either a `list` or a single `~lsst.daf.butler.DatasetRef` to check inout : `set` The connection type to check, e.g. either an input or an output. This prevents both types needing to be checked for every operation, diff --git a/python/lsst/pipe/base/config.py b/python/lsst/pipe/base/config.py index cfdc4917d..c64df134d 100644 --- a/python/lsst/pipe/base/config.py +++ b/python/lsst/pipe/base/config.py @@ -227,7 +227,7 @@ def applyConfigOverrides( taskDefaultName : `str` The default name associated with the `Task` class. This may be used with instrumental overrides. - pipelineConfigs : `Iterable` of `ConfigIR` + pipelineConfigs : `~collections.abc.Iterable` of `ConfigIR` An iterable of `ConfigIR` objects that contain overrides to apply to this config instance. parameters : `ParametersIR` diff --git a/python/lsst/pipe/base/connectionTypes.py b/python/lsst/pipe/base/connectionTypes.py index 036901117..97b376eea 100644 --- a/python/lsst/pipe/base/connectionTypes.py +++ b/python/lsst/pipe/base/connectionTypes.py @@ -34,14 +34,14 @@ @dataclasses.dataclass(frozen=True) class BaseConnection: - """Base class used for declaring PipelineTask connections + """Base class used for declaring `PipelineTask` connections. Parameters ---------- name : `str` - The name used to identify the dataset type + The name used to identify the dataset type. storageClass : `str` - The storage class used when (un)/persisting the dataset type + The storage class used when (un)/persisting the dataset type. multiple : `bool` Indicates if this connection should expect to contain multiple objects of the given dataset type. Tasks with more than one connection with @@ -49,7 +49,7 @@ class BaseConnection: `PipelineTaskConnections.adjustQuantum` to ensure those datasets are consistent (i.e. zip-iterable) in `PipelineTask.runQuantum()` and notify the execution system as early as possible of outputs that will - not be produced because the corresponding input is missing. + not be produced because the corresponding input is missing. """ name: str @@ -87,7 +87,8 @@ def __get__(self, inst, klass): def makeDatasetType( self, universe: DimensionUniverse, parentStorageClass: StorageClass | str | None = None ) -> DatasetType: - """Construct a true `DatasetType` instance with normalized dimensions. + """Construct a true `~lsst.daf.butler.DatasetType` instance with + normalized dimensions. Parameters ---------- @@ -99,8 +100,8 @@ def makeDatasetType( Returns ------- - datasetType : `DatasetType` - The `DatasetType` defined by this connection. + datasetType : `~lsst.daf.butler.DatasetType` + The `~lsst.daf.butler.DatasetType` defined by this connection. """ return DatasetType( self.name, universe.empty, self.storageClass, parentStorageClass=parentStorageClass @@ -149,7 +150,8 @@ def __post_init__(self): def makeDatasetType( self, universe: DimensionUniverse, parentStorageClass: StorageClass | str | None = None ) -> DatasetType: - """Construct a true `DatasetType` instance with normalized dimensions. + """Construct a true `~lsst.daf.butler.DatasetType` instance with + normalized dimensions. Parameters ---------- @@ -161,8 +163,8 @@ def makeDatasetType( Returns ------- - datasetType : `DatasetType` - The `DatasetType` defined by this connection. + datasetType : `~lsst.daf.butler.DatasetType` + The `~lsst.daf.butler.DatasetType` defined by this connection. """ return DatasetType( self.name, diff --git a/python/lsst/pipe/base/executionButlerBuilder.py b/python/lsst/pipe/base/executionButlerBuilder.py index 7fcd292a6..6a571a864 100644 --- a/python/lsst/pipe/base/executionButlerBuilder.py +++ b/python/lsst/pipe/base/executionButlerBuilder.py @@ -48,7 +48,8 @@ def _validate_dataset_type( ---------- candidate : `lsst.daf.butler.DatasetType` The candidate dataset type. - previous : `dict` [Union[`str`, `DatasetType`], `DatasetType`] + previous : `dict` [Union[`str`, `~lsst.daf.butler.DatasetType``], \ + `~lsst.daf.butler.DatasetType``] Previous dataset types found, indexed by name and also by dataset type. The latter provides a quick way of returning a previously checked dataset type. diff --git a/python/lsst/pipe/base/graph/_implDetails.py b/python/lsst/pipe/base/graph/_implDetails.py index 6a38cb247..a1e0aa15c 100644 --- a/python/lsst/pipe/base/graph/_implDetails.py +++ b/python/lsst/pipe/base/graph/_implDetails.py @@ -253,7 +253,7 @@ def _pruner( `QuantumNode`\s that produce/consume that `~lsst.daf.butler.DatasetRef`. This function modifies ``datasetRefDict`` in-place. - refsToRemove : `Iterable` of `~lsst.daf.butler.DatasetRef` + refsToRemove : `~collections.abc.Iterable` of `~lsst.daf.butler.DatasetRef` The `~lsst.daf.butler.DatasetRef`\ s which should be pruned from the input dataset tracker. alreadyPruned : `set` of `QuantumNode` diff --git a/python/lsst/pipe/base/graph/_loadHelpers.py b/python/lsst/pipe/base/graph/_loadHelpers.py index 47e3962c0..20ab1752f 100644 --- a/python/lsst/pipe/base/graph/_loadHelpers.py +++ b/python/lsst/pipe/base/graph/_loadHelpers.py @@ -168,7 +168,7 @@ def load( The universe saved with the graph is used, but if one is passed it will be used to validate the compatibility with the loaded graph universe. - nodes : `Iterable` of `UUID` or `str`; or `None` + nodes : `~collections.abc.Iterable` of `UUID` or `str`; or `None` The nodes to load from the graph, loads all if value is None (the default) graphID : `str` or `None` diff --git a/python/lsst/pipe/base/graph/graph.py b/python/lsst/pipe/base/graph/graph.py index 5d3fac6d7..eb9d2bc52 100644 --- a/python/lsst/pipe/base/graph/graph.py +++ b/python/lsst/pipe/base/graph/graph.py @@ -390,7 +390,7 @@ def pruneGraphFromRefs(self: _T, refs: Iterable[DatasetRef]) -> _T: Parameters ---------- - refs : `Iterable` of `~lsst.daf.butler.DatasetRef` + refs : `~collections.abc.Iterable` of `~lsst.daf.butler.DatasetRef` Refs which should be removed from resulting graph Returns diff --git a/python/lsst/pipe/base/graphBuilder.py b/python/lsst/pipe/base/graphBuilder.py index d53cb5aec..7701a81fe 100644 --- a/python/lsst/pipe/base/graphBuilder.py +++ b/python/lsst/pipe/base/graphBuilder.py @@ -102,14 +102,15 @@ def resolved_ref(self) -> DatasetRef: class _DatasetDict(NamedKeyDict[DatasetType, dict[DataCoordinate, _RefHolder]]): - """A custom dictionary that maps `DatasetType` to a nested dictionary of - the known `DatasetRef` instances of that type. + """A custom dictionary that maps `~lsst.daf.butler.DatasetType` to a nested + dictionary of the known `~lsst.daf.butler.DatasetRef` instances of that + type. Parameters ---------- args Positional arguments are forwarded to the `dict` constructor. - universe : `DimensionUniverse` + universe : `~lsst.daf.butler.DimensionUniverse` Universe of all possible dimensions. """ @@ -121,14 +122,16 @@ def __init__(self, *args: Any, universe: DimensionUniverse): def fromDatasetTypes( cls, datasetTypes: Iterable[DatasetType], *, universe: DimensionUniverse ) -> _DatasetDict: - """Construct a dictionary from a flat iterable of `DatasetType` keys. + """Construct a dictionary from a flat iterable of + `~lsst.daf.butler.DatasetType` keys. Parameters ---------- - datasetTypes : `iterable` of `DatasetType` + datasetTypes : `~collections.abc.Iterable` of \ + `~lsst.daf.butler.DatasetType` DatasetTypes to use as keys for the dict. Values will be empty dictionaries. - universe : `DimensionUniverse` + universe : `~lsst.daf.butler.DimensionUniverse` Universe of all possible dimensions. Returns @@ -150,7 +153,8 @@ def fromSubset( Parameters ---------- - datasetTypes : `iterable` of `DatasetType` + datasetTypes : `~collections.abc.Iterable` of \ + `~lsst.daf.butler.DatasetType` DatasetTypes to use as keys for the dict. Values will be obtained by lookups against ``first`` and ``rest``. first : `_DatasetDict` @@ -245,8 +249,9 @@ def dimensions(self) -> DimensionGraph: return base.union(*[datasetType.dimensions for datasetType in self.keys()]) def unpackSingleRefs(self, storage_classes: dict[str, str]) -> NamedKeyDict[DatasetType, DatasetRef]: - """Unpack nested single-element `DatasetRef` dicts into a new - mapping with `DatasetType` keys and `DatasetRef` values. + """Unpack nested single-element `~lsst.daf.butler.DatasetRef` dicts + into a new mapping with `~lsst.daf.butler.DatasetType` keys and + `~lsst.daf.butler.DatasetRef` values. This method assumes that each nest contains exactly one item, as is the case for all "init" datasets. @@ -262,16 +267,19 @@ def unpackSingleRefs(self, storage_classes: dict[str, str]) -> NamedKeyDict[Data Returns ------- dictionary : `NamedKeyDict` - Dictionary mapping `DatasetType` to `DatasetRef`, with both - `DatasetType` instances and string names usable as keys. + Dictionary mapping `~lsst.daf.butler.DatasetType` to + `~lsst.daf.butler.DatasetRef`, with both + `~lsst.daf.butler.DatasetType` instances and string names usable + as keys. """ return NamedKeyDict( {datasetType: refs[0] for datasetType, refs in self.unpackMultiRefs(storage_classes).items()} ) def unpackMultiRefs(self, storage_classes: dict[str, str]) -> NamedKeyDict[DatasetType, list[DatasetRef]]: - """Unpack nested multi-element `DatasetRef` dicts into a new - mapping with `DatasetType` keys and `list` of `DatasetRef` values. + """Unpack nested multi-element `~lsst.daf.butler.DatasetRef` dicts into + a new mapping with `~lsst.daf.butler.DatasetType` keys and `list` of + `~lsst.daf.butler.DatasetRef` values. Parameters ---------- @@ -284,8 +292,10 @@ def unpackMultiRefs(self, storage_classes: dict[str, str]) -> NamedKeyDict[Datas Returns ------- dictionary : `NamedKeyDict` - Dictionary mapping `DatasetType` to `list` of `DatasetRef`, with - both `DatasetType` instances and string names usable as keys. + Dictionary mapping `~lsst.daf.butler.DatasetType` to `list` of + `~lsst.daf.butler.DatasetRef`, with both + `~lsst.daf.butler.DatasetType` instances and string names usable + as keys. """ result = {} for dataset_type, holders in self.items(): @@ -302,19 +312,20 @@ def unpackMultiRefs(self, storage_classes: dict[str, str]) -> NamedKeyDict[Datas def extract( self, datasetType: DatasetType, dataIds: Iterable[DataCoordinate] ) -> Iterator[tuple[DataCoordinate, DatasetRef | None]]: - """Iterate over the contained `DatasetRef` instances that match the - given `DatasetType` and data IDs. + """Iterate over the contained `~lsst.daf.butler.DatasetRef` instances + that match the given `~lsst.daf.butler.DatasetType` and data IDs. Parameters ---------- - datasetType : `DatasetType` + datasetType : `~lsst.daf.butler.DatasetType` Dataset type to match. - dataIds : `Iterable` [ `DataCoordinate` ] + dataIds : `~collections.abc.Iterable` \ + [ `~lsst.daf.butler.DataCoordinate` ] Data IDs to match. Returns ------- - refs : `Iterator` [ `DatasetRef` ] + refs : `~collections.abc.Iterator` [ `~lsst.daf.butler.DatasetRef` ] DatasetRef instances for which ``ref.datasetType == datasetType`` and ``ref.dataId`` is in ``dataIds``. """ @@ -356,7 +367,7 @@ class _QuantumScaffolding: task : _TaskScaffolding Back-reference to the helper object for the `PipelineTask` this quantum represents an execution of. - dataId : `DataCoordinate` + dataId : `~lsst.daf.butler.DataCoordinate` Data ID for this quantum. """ @@ -384,21 +395,23 @@ def __repr__(self) -> str: """ inputs: _DatasetDict - """Nested dictionary containing `DatasetRef` inputs to this quantum. + """Nested dictionary containing `~lsst.daf.butler.DatasetRef` inputs to + this quantum. - This is initialized to map each `DatasetType` to an empty dictionary at - construction. Those nested dictionaries are populated (with data IDs as - keys) with unresolved `DatasetRef` instances in - `_PipelineScaffolding.connectDataIds`. + This is initialized to map each `~lsst.daf.butler.DatasetType` to an empty + dictionary at construction. Those nested dictionaries are populated + (with data IDs as keys) with unresolved `~lsst.daf.butler.DatasetRef` + instances in `_PipelineScaffolding.connectDataIds`. """ outputs: _DatasetDict - """Nested dictionary containing `DatasetRef` outputs this quantum. + """Nested dictionary containing `~lsst.daf.butler.DatasetRef` outputs this + quantum. """ prerequisites: _DatasetDict - """Nested dictionary containing `DatasetRef` prerequisite inputs to this - quantum. + """Nested dictionary containing `~lsst.daf.butler.DatasetRef` prerequisite + inputs to this quantum. """ def makeQuantum(self, datastore_records: Optional[Mapping[str, DatastoreRecordData]] = None) -> Quantum: @@ -406,7 +419,8 @@ def makeQuantum(self, datastore_records: Optional[Mapping[str, DatastoreRecordDa Parameters ---------- - datastore_records : `dict` [ `str`, `DatastoreRecordData` ], optional + datastore_records : `dict` [ `str`, \ + `~lsst.daf.butler.DatastoreRecordData` ], optional If not `None` then fill datastore records in each generated Quantum using the records from this structure. @@ -645,10 +659,10 @@ class _PipelineScaffolding: Parameters ---------- - pipeline : `Pipeline` or `Iterable` [ `TaskDef` ] + pipeline : `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ] Sequence of tasks from which a graph is to be constructed. Must have nested task classes already imported. - universe : `DimensionUniverse` + universe : `~lsst.daf.butler.DimensionUniverse` Universe of all possible dimensions. Notes @@ -821,7 +835,7 @@ def connectDataIds( datasets. See :ref:`daf_butler_ordered_collection_searches`. userQuery : `str` or `None` User-provided expression to limit the data IDs processed. - externalDataId : `DataCoordinate` + externalDataId : `~lsst.daf.butler.DataCoordinate` Externally-provided data ID that should be used to restrict the results, just as if these constraints had been included via ``AND`` in ``userQuery``. This includes (at least) any instrument named @@ -830,7 +844,7 @@ def connectDataIds( The query constraint variant that should be used to constraint the query based on dataset existance, defaults to `DatasetQueryConstraintVariant.ALL`. - bind : `Mapping`, optional + bind : `~collections.abc.Mapping`, optional Mapping containing literal values that should be injected into the ``userQuery`` expression, keyed by the identifiers they replace. @@ -1383,7 +1397,7 @@ def makeQuantumGraph( This is an optional parameter of extra data to carry with the graph. Entries in this mapping should be able to be serialized in JSON. - datastore : `Datastore`, optional + datastore : `~lsst.daf.butler.Datastore`, optional If not `None` then fill datastore records in each generated Quantum. @@ -1533,7 +1547,7 @@ class GraphBuilder: If `True` (default), allow quanta to created even if partial outputs exist; this requires the same behavior behavior to be enabled when executing. - datastore : `Datastore`, optional + datastore : `~lsst.daf.butler.Datastore`, optional If not `None` then fill datastore records in each generated Quantum. """ @@ -1565,7 +1579,7 @@ def makeGraph( Parameters ---------- - pipeline : `Pipeline` or `Iterable` [ `TaskDef` ] + pipeline : `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ] Pipeline definition, task names/classes and their configs. collections Expressions representing the collections to search for input @@ -1585,7 +1599,7 @@ def makeGraph( This is an optional parameter of extra data to carry with the graph. Entries in this mapping should be able to be serialized in JSON. - bind : `Mapping`, optional + bind : `~collections.abc.Mapping`, optional Mapping containing literal values that should be injected into the ``userQuery`` expression, keyed by the identifiers they replace. dataId : `lsst.daf.butler.DataCoordinate`, optional diff --git a/python/lsst/pipe/base/pipeline.py b/python/lsst/pipe/base/pipeline.py index 207b86f45..bfe54c1f8 100644 --- a/python/lsst/pipe/base/pipeline.py +++ b/python/lsst/pipe/base/pipeline.py @@ -912,17 +912,19 @@ def fromTaskDef( taskDef: `TaskDef` An instance of a `TaskDef` class for a particular `PipelineTask`. registry: `Registry` - Registry used to construct normalized `DatasetType` objects and - retrieve those that are incomplete. + Registry used to construct normalized + `~lsst.daf.butler.DatasetType` objects and retrieve those that are + incomplete. include_configs : `bool`, optional If `True` (default) include config dataset types as ``initOutputs``. - storage_class_mapping : `Mapping` of `str` to `StorageClass`, optional + storage_class_mapping : `~collections.abc.Mapping` of `str` to \ + `StorageClass`, optional If a taskdef contains a component dataset type that is unknown - to the registry, its parent StorageClass will be looked up in this - mapping if it is supplied. If the mapping does not contain the - composite dataset type, or the mapping is not supplied an exception - will be raised. + to the registry, its parent `~lsst.daf.butler.StorageClass` will + be looked up in this mapping if it is supplied. If the mapping does + not contain the composite dataset type, or the mapping is not + supplied an exception will be raised. Returns ------- @@ -945,7 +947,7 @@ def makeDatasetTypesSet( is_input: bool, freeze: bool = True, ) -> NamedValueSet[DatasetType]: - """Construct a set of true `DatasetType` objects + """Construct a set of true `~lsst.daf.butler.DatasetType` objects Parameters ---------- @@ -1222,11 +1224,12 @@ def fromPipeline( Parameters ---------- - pipeline: `Pipeline` or `Iterable` [ `TaskDef` ] + pipeline: `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ] A collection of tasks that can be run together. registry: `Registry` - Registry used to construct normalized `DatasetType` objects and - retrieve those that are incomplete. + Registry used to construct normalized + `~lsst.daf.butler.DatasetType` objects and retrieve those that are + incomplete. include_configs : `bool`, optional If `True` (default) include config dataset types as ``initOutputs``. @@ -1366,7 +1369,7 @@ def initOutputNames( Parameters ---------- - pipeline: `Pipeline` or `Iterable` [ `TaskDef` ] + pipeline: `Pipeline` or `~collections.abc.Iterable` [ `TaskDef` ] A `Pipeline` instance or collection of `TaskDef` instances. include_configs : `bool`, optional If `True` (default) include config dataset types. diff --git a/python/lsst/pipe/base/taskFactory.py b/python/lsst/pipe/base/taskFactory.py index 73bc695d9..f2ccbe765 100644 --- a/python/lsst/pipe/base/taskFactory.py +++ b/python/lsst/pipe/base/taskFactory.py @@ -56,7 +56,8 @@ def makeTask( Task definition structure. butler : `lsst.daf.butler.LimitedButler` Butler instance used to obtain initialization inputs for task. - initInputRefs : `Iterable` of `~lsst.daf.butler.DatasetRef` or `None` + initInputRefs : `~collections.abc.Iterable` of \ + `~lsst.daf.butler.DatasetRef` or `None` List of resolved dataset references for init inputs for this task. Returns From b429cee3b97f72651a30720257b1143b7925774e Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 9 Jun 2023 15:49:18 -0700 Subject: [PATCH 08/10] Use modern type annotations --- .../pipe/base/_datasetQueryConstraints.py | 9 +- python/lsst/pipe/base/_dataset_handle.py | 10 +- python/lsst/pipe/base/_instrument.py | 17 ++- python/lsst/pipe/base/_task_metadata.py | 36 ++--- python/lsst/pipe/base/butlerQuantumContext.py | 25 ++-- python/lsst/pipe/base/config.py | 12 +- .../lsst/pipe/base/executionButlerBuilder.py | 32 ++-- python/lsst/pipe/base/formatters/pexConfig.py | 4 +- python/lsst/pipe/base/graph/_implDetails.py | 21 +-- python/lsst/pipe/base/graph/_loadHelpers.py | 27 ++-- .../pipe/base/graph/_versionDeserializers.py | 55 +++---- python/lsst/pipe/base/graph/graph.py | 140 ++++++++---------- python/lsst/pipe/base/graph/quantumNode.py | 10 +- python/lsst/pipe/base/graphBuilder.py | 28 ++-- python/lsst/pipe/base/pipeTools.py | 9 +- python/lsst/pipe/base/pipeline.py | 61 +++----- python/lsst/pipe/base/pipelineTask.py | 10 +- .../pipe/base/script/register_instrument.py | 4 +- python/lsst/pipe/base/struct.py | 4 +- python/lsst/pipe/base/task.py | 43 ++---- python/lsst/pipe/base/testUtils.py | 23 ++- python/lsst/pipe/base/tests/no_dimensions.py | 4 +- python/lsst/pipe/base/tests/simpleQGraph.py | 32 ++-- 23 files changed, 281 insertions(+), 335 deletions(-) diff --git a/python/lsst/pipe/base/_datasetQueryConstraints.py b/python/lsst/pipe/base/_datasetQueryConstraints.py index 64c910793..a3facf77d 100644 --- a/python/lsst/pipe/base/_datasetQueryConstraints.py +++ b/python/lsst/pipe/base/_datasetQueryConstraints.py @@ -27,7 +27,8 @@ __all__ = ("DatasetQueryConstraintVariant",) import warnings -from typing import Iterable, Iterator, Protocol, Type +from collections.abc import Iterable, Iterator +from typing import Protocol class DatasetQueryConstraintVariant(Iterable, Protocol): @@ -54,9 +55,9 @@ class DatasetQueryConstraintVariant(Iterable, Protocol): `fromExpression` class method given a valid string. """ - ALL: "Type[_ALL]" - OFF: "Type[_OFF]" - LIST: "Type[_LIST]" + ALL: "type[_ALL]" + OFF: "type[_OFF]" + LIST: "type[_LIST]" @classmethod def __subclasshook__(cls, subclass): diff --git a/python/lsst/pipe/base/_dataset_handle.py b/python/lsst/pipe/base/_dataset_handle.py index db230a463..303d813dc 100644 --- a/python/lsst/pipe/base/_dataset_handle.py +++ b/python/lsst/pipe/base/_dataset_handle.py @@ -23,7 +23,7 @@ __all__ = ["InMemoryDatasetHandle"] import dataclasses -from typing import Any, Optional, cast +from typing import Any, cast from frozendict import frozendict from lsst.daf.butler import ( @@ -85,8 +85,8 @@ def __init__( def get( self, *, - component: Optional[str] = None, - parameters: Optional[dict] = None, + component: str | None = None, + parameters: dict | None = None, storageClass: str | StorageClass | None = None, ) -> Any: """Retrieve the dataset pointed to by this handle. @@ -244,14 +244,14 @@ def _getStorageClass(self) -> StorageClass: handle. """ - storageClass: Optional[str] = None + storageClass: str | None = None """The name of the `~lsst.daf.butler.StorageClass` associated with this dataset. If `None`, the storage class will be looked up from the factory. """ - parameters: Optional[dict] = None + parameters: dict | None = None """Optional parameters that may be used to specify a subset of the dataset to be loaded (`dict` or `None`). """ diff --git a/python/lsst/pipe/base/_instrument.py b/python/lsst/pipe/base/_instrument.py index 07f4439ea..fd8d5509d 100644 --- a/python/lsst/pipe/base/_instrument.py +++ b/python/lsst/pipe/base/_instrument.py @@ -26,7 +26,8 @@ import datetime import os.path from abc import ABCMeta, abstractmethod -from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, Union, cast, final +from collections.abc import Sequence +from typing import TYPE_CHECKING, Any, cast, final from lsst.daf.butler import DataCoordinate, DataId, DimensionPacker, DimensionRecord, Formatter from lsst.daf.butler.registry import DataIdError @@ -64,7 +65,7 @@ class Instrument(metaclass=ABCMeta): each of the Tasks that requires special configuration. """ - policyName: Optional[str] = None + policyName: str | None = None """Instrument specific name to use when locating a policy or configuration file in the file system.""" @@ -73,7 +74,7 @@ class Instrument(metaclass=ABCMeta): of the dataset type name, a tuple of dimension names, and the storage class name. If `None` the ingest system will use its default definition.""" - def __init__(self, collection_prefix: Optional[str] = None): + def __init__(self, collection_prefix: str | None = None): if collection_prefix is None: collection_prefix = self.getName() self.collection_prefix = collection_prefix @@ -131,7 +132,7 @@ def register(self, registry: Registry, *, update: bool = False) -> None: raise NotImplementedError() @staticmethod - def fromName(name: str, registry: Registry, collection_prefix: Optional[str] = None) -> Instrument: + def fromName(name: str, registry: Registry, collection_prefix: str | None = None) -> Instrument: """Given an instrument name and a butler registry, retrieve a corresponding instantiated instrument object. @@ -182,7 +183,7 @@ def fromName(name: str, registry: Registry, collection_prefix: Optional[str] = N @staticmethod def from_string( - name: str, registry: Optional[Registry] = None, collection_prefix: Optional[str] = None + name: str, registry: Registry | None = None, collection_prefix: str | None = None ) -> Instrument: """Return an instance from the short name or class name. @@ -242,7 +243,7 @@ def from_string( return instr @staticmethod - def from_data_id(data_id: DataCoordinate, collection_prefix: Optional[str] = None) -> Instrument: + def from_data_id(data_id: DataCoordinate, collection_prefix: str | None = None) -> Instrument: """Instantiate an `Instrument` object from a fully-expanded data ID. Parameters @@ -331,7 +332,7 @@ def importAll(registry: Registry) -> None: pass @abstractmethod - def getRawFormatter(self, dataId: DataId) -> Type[Formatter]: + def getRawFormatter(self, dataId: DataId) -> type[Formatter]: """Return the Formatter class that should be used to read a particular raw file. @@ -365,7 +366,7 @@ def applyConfigOverrides(self, name: str, config: Config) -> None: config.load(path) @staticmethod - def formatCollectionTimestamp(timestamp: Union[str, datetime.datetime]) -> str: + def formatCollectionTimestamp(timestamp: str | datetime.datetime) -> str: """Format a timestamp for use in a collection name. Parameters diff --git a/python/lsst/pipe/base/_task_metadata.py b/python/lsst/pipe/base/_task_metadata.py index bf731bc70..67af6a4df 100644 --- a/python/lsst/pipe/base/_task_metadata.py +++ b/python/lsst/pipe/base/_task_metadata.py @@ -24,8 +24,8 @@ import itertools import numbers import warnings -from collections.abc import Sequence -from typing import Any, Collection, Dict, Iterator, List, Mapping, Optional, Protocol, Set, Tuple, Union +from collections.abc import Collection, Iterator, Mapping, Sequence +from typing import Any, Protocol from pydantic import BaseModel, Field, StrictBool, StrictFloat, StrictInt, StrictStr @@ -68,11 +68,11 @@ class TaskMetadata(BaseModel): the value into sub-dictionary. Arbitrary hierarchies are supported. """ - scalars: Dict[str, Union[StrictFloat, StrictInt, StrictBool, StrictStr]] = Field(default_factory=dict) - arrays: Dict[str, Union[List[StrictFloat], List[StrictInt], List[StrictBool], List[StrictStr]]] = Field( + scalars: dict[str, StrictFloat | StrictInt | StrictBool | StrictStr] = Field(default_factory=dict) + arrays: dict[str, list[StrictFloat] | list[StrictInt] | list[StrictBool] | list[StrictStr]] = Field( default_factory=dict ) - metadata: Dict[str, "TaskMetadata"] = Field(default_factory=dict) + metadata: dict[str, "TaskMetadata"] = Field(default_factory=dict) @classmethod def from_dict(cls, d: Mapping[str, Any]) -> "TaskMetadata": @@ -129,7 +129,7 @@ class method. metadata[key] = value return metadata - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """Convert the class to a simple dictionary. Returns @@ -145,7 +145,7 @@ def to_dict(self) -> Dict[str, Any]: used when a simple dictionary is needed. Use `TaskMetadata.from_dict()` to convert it back. """ - d: Dict[str, Any] = {} + d: dict[str, Any] = {} d.update(self.scalars) d.update(self.arrays) for k, v in self.metadata.items(): @@ -197,7 +197,7 @@ def add(self, name: str, value: Any) -> None: self.metadata[key0].add(".".join(keys), value) - def getScalar(self, key: str) -> Union[str, int, float, bool]: + def getScalar(self, key: str) -> str | int | float | bool: """Retrieve a scalar item even if the item is a list. Parameters @@ -220,7 +220,7 @@ def getScalar(self, key: str) -> Union[str, int, float, bool]: # getScalar() is the default behavior for __getitem__. return self[key] - def getArray(self, key: str) -> List[Any]: + def getArray(self, key: str) -> list[Any]: """Retrieve an item as a list even if it is a scalar. Parameters @@ -255,7 +255,7 @@ def getArray(self, key: str) -> List[Any]: # Report the correct key. raise KeyError(f"'{key}' not found") from None - def names(self, topLevelOnly: bool = True) -> Set[str]: + def names(self, topLevelOnly: bool = True) -> set[str]: """Return the hierarchical keys from the metadata. Parameters @@ -291,7 +291,7 @@ def names(self, topLevelOnly: bool = True) -> Set[str]: names.update({k + "." + item for item in v.names(topLevelOnly=topLevelOnly)}) return names - def paramNames(self, topLevelOnly: bool) -> Set[str]: + def paramNames(self, topLevelOnly: bool) -> set[str]: """Return hierarchical names. Parameters @@ -319,7 +319,7 @@ def paramNames(self, topLevelOnly: bool) -> Set[str]: return paramNames @staticmethod - def _getKeys(key: str) -> List[str]: + def _getKeys(key: str) -> list[str]: """Return the key hierarchy. Parameters @@ -343,11 +343,11 @@ def _getKeys(key: str) -> List[str]: raise KeyError(f"Invalid key '{key}': only string keys are allowed") from None return keys - def keys(self) -> Tuple[str, ...]: + def keys(self) -> tuple[str, ...]: """Return the top-level keys.""" return tuple(k for k in self) - def items(self) -> Iterator[Tuple[str, Any]]: + def items(self) -> Iterator[tuple[str, Any]]: """Yield the top-level keys and values.""" for k, v in itertools.chain(self.scalars.items(), self.arrays.items(), self.metadata.items()): yield (k, v) @@ -432,12 +432,12 @@ def __setitem__(self, key: str, item: Any) -> None: keys = self._getKeys(key) key0 = keys.pop(0) if len(keys) == 0: - slots: Dict[str, Dict[str, Any]] = { + slots: dict[str, dict[str, Any]] = { "array": self.arrays, "scalar": self.scalars, "metadata": self.metadata, } - primary: Optional[Dict[str, Any]] = None + primary: dict[str, Any] | None = None slot_type, item = self._validate_value(item) primary = slots.pop(slot_type, None) if primary is None: @@ -484,7 +484,7 @@ def __delitem__(self, key: str) -> None: # MyPy can't figure out that this way to combine the types in the # tuple is the one that matters, and annotating a local variable # helps it out. - properties: Tuple[Dict[str, Any], ...] = (self.scalars, self.arrays, self.metadata) + properties: tuple[dict[str, Any], ...] = (self.scalars, self.arrays, self.metadata) for property in properties: if key0 in property: del property[key0] @@ -497,7 +497,7 @@ def __delitem__(self, key: str) -> None: # Report the correct key. raise KeyError(f"'{key}' not found'") from None - def _validate_value(self, value: Any) -> Tuple[str, Any]: + def _validate_value(self, value: Any) -> tuple[str, Any]: """Validate the given value. Parameters diff --git a/python/lsst/pipe/base/butlerQuantumContext.py b/python/lsst/pipe/base/butlerQuantumContext.py index fa3f16e33..88a69b6ac 100644 --- a/python/lsst/pipe/base/butlerQuantumContext.py +++ b/python/lsst/pipe/base/butlerQuantumContext.py @@ -26,7 +26,8 @@ __all__ = ("ButlerQuantumContext",) -from typing import Any, List, Optional, Sequence, Union +from collections.abc import Sequence +from typing import Any from lsst.daf.butler import DatasetRef, DimensionUniverse, LimitedButler, Quantum from lsst.utils.introspection import get_full_type_name @@ -74,7 +75,7 @@ def __init__(self, butler: LimitedButler, quantum: Quantum): self.allOutputs.add((ref.datasetType, ref.dataId)) self.__butler = butler - def _get(self, ref: Optional[Union[DeferredDatasetRef, DatasetRef]]) -> Any: + def _get(self, ref: DeferredDatasetRef | DatasetRef | None) -> Any: # Butler methods below will check for unresolved DatasetRefs and # raise appropriately, so no need for us to do that here. if isinstance(ref, DeferredDatasetRef): @@ -93,14 +94,12 @@ def _put(self, value: Any, ref: DatasetRef) -> None: def get( self, - dataset: Union[ - InputQuantizedConnection, - List[Optional[DatasetRef]], - List[Optional[DeferredDatasetRef]], - DatasetRef, - DeferredDatasetRef, - None, - ], + dataset: InputQuantizedConnection + | list[DatasetRef | None] + | list[DeferredDatasetRef | None] + | DatasetRef + | DeferredDatasetRef + | None, ) -> Any: """Fetch data from the butler @@ -197,8 +196,8 @@ def get( def put( self, - values: Union[Struct, List[Any], Any], - dataset: Union[OutputQuantizedConnection, List[DatasetRef], DatasetRef], + values: Struct | list[Any] | Any, + dataset: OutputQuantizedConnection | list[DatasetRef] | DatasetRef, ) -> None: """Put data into the butler. @@ -257,7 +256,7 @@ def put( else: raise TypeError("Dataset argument is not a type that can be used to put") - def _checkMembership(self, ref: Union[List[DatasetRef], DatasetRef], inout: set) -> None: + def _checkMembership(self, ref: list[DatasetRef] | DatasetRef, inout: set) -> None: """Check if a `~lsst.daf.butler.DatasetRef` is part of the input `~lsst.daf.butler.Quantum`. diff --git a/python/lsst/pipe/base/config.py b/python/lsst/pipe/base/config.py index c64df134d..3f773bf7d 100644 --- a/python/lsst/pipe/base/config.py +++ b/python/lsst/pipe/base/config.py @@ -32,7 +32,7 @@ import os from collections.abc import Iterable from numbers import Number -from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Type, TypeVar +from typing import TYPE_CHECKING, Any, TypeVar # ----------------------------- # Imports for other modules -- @@ -86,7 +86,7 @@ def __set__( self, instance: pexConfig.Config, value: Any, - at: Optional[StackFrame] = None, + at: StackFrame | None = None, label: str = "assignment", ) -> None: # validate first, even though validate will be called in super @@ -113,10 +113,10 @@ class PipelineTaskConfigMeta(pexConfig.ConfigMeta): """ def __new__( - cls: Type[_S], + cls: type[_S], name: str, - bases: Tuple[type[PipelineTaskConfig], ...], - dct: Dict[str, Any], + bases: tuple[type[PipelineTaskConfig], ...], + dct: dict[str, Any], **kwargs: Any, ) -> _S: if name != "PipelineTaskConfig": @@ -165,7 +165,7 @@ def __new__( return inst def __init__( - self, name: str, bases: Tuple[Type[PipelineTaskConfig], ...], dct: Dict[str, Any], **kwargs: Any + self, name: str, bases: tuple[type[PipelineTaskConfig], ...], dct: dict[str, Any], **kwargs: Any ): # This overrides the default init to drop the kwargs argument. Python # metaclasses will have this argument set if any kwargs are passes at diff --git a/python/lsst/pipe/base/executionButlerBuilder.py b/python/lsst/pipe/base/executionButlerBuilder.py index 6a571a864..6c462a693 100644 --- a/python/lsst/pipe/base/executionButlerBuilder.py +++ b/python/lsst/pipe/base/executionButlerBuilder.py @@ -24,7 +24,7 @@ import io from collections import defaultdict -from typing import Callable, Iterable, List, Mapping, Optional, Set, Tuple, Union +from collections.abc import Callable, Iterable, Mapping from lsst.daf.butler import Butler, Config, DatasetRef, DatasetType, Registry from lsst.daf.butler.core.repoRelocation import BUTLER_ROOT_TAG @@ -35,11 +35,11 @@ from .graph import QuantumGraph -DataSetTypeRefMap = Mapping[DatasetType, Set[DatasetRef]] +DataSetTypeRefMap = Mapping[DatasetType, set[DatasetRef]] def _validate_dataset_type( - candidate: DatasetType, previous: dict[Union[str, DatasetType], DatasetType], registry: Registry + candidate: DatasetType, previous: dict[str | DatasetType, DatasetType], registry: Registry ) -> DatasetType: """Check the dataset types and return a consistent variant if there are different compatible options. @@ -119,13 +119,13 @@ def _validate_dataset_type( def _accumulate( butler: Butler, graph: QuantumGraph, -) -> Tuple[Set[DatasetRef], DataSetTypeRefMap]: +) -> tuple[set[DatasetRef], DataSetTypeRefMap]: # accumulate the DatasetRefs that will be transferred to the execution # registry # exports holds all the existing data that will be migrated to the # execution butler - exports: Set[DatasetRef] = set() + exports: set[DatasetRef] = set() # inserts is the mapping of DatasetType to dataIds for what is to be # inserted into the registry. These are the products that are expected @@ -137,7 +137,7 @@ def _accumulate( # must we must ensure that only a single dataset type definition is # accumulated in the loop below. This data structure caches every dataset # type encountered and stores the compatible alternative. - datasetTypes: dict[Union[str, DatasetType], DatasetType] = {} + datasetTypes: dict[str | DatasetType, DatasetType] = {} # Find the initOutput refs. initOutputRefs = list(graph.globalInitOutputRefs()) @@ -155,10 +155,10 @@ def _accumulate( # Output references may be resolved even if they do not exist. Find all # actually existing refs. - check_refs: Set[DatasetRef] = set() + check_refs: set[DatasetRef] = set() for quantum in (n.quantum for n in graph): for attrName in ("initInputs", "inputs", "outputs"): - attr: Mapping[DatasetType, Union[DatasetRef, List[DatasetRef]]] = getattr(quantum, attrName) + attr: Mapping[DatasetType, DatasetRef | list[DatasetRef]] = getattr(quantum, attrName) for type, refs in attr.items(): # This if block is because init inputs has a different # signature for its items @@ -229,7 +229,7 @@ def _discoverCollections(butler: Butler, collections: Iterable[str]) -> set[str] return collections -def _export(butler: Butler, collections: Optional[Iterable[str]], inserts: DataSetTypeRefMap) -> io.StringIO: +def _export(butler: Butler, collections: Iterable[str] | None, inserts: DataSetTypeRefMap) -> io.StringIO: # This exports relevant dimension records and collections using daf butler # objects, however it reaches in deep and does not use the public methods # so that it can export it to a string buffer and skip disk access. This @@ -267,7 +267,7 @@ def _setupNewButler( butler: Butler, outputLocation: ResourcePath, dirExists: bool, - datastoreRoot: Optional[ResourcePath] = None, + datastoreRoot: ResourcePath | None = None, ) -> Butler: """Set up the execution butler @@ -338,8 +338,8 @@ def _import( yamlBuffer: io.StringIO, newButler: Butler, inserts: DataSetTypeRefMap, - run: Optional[str], - butlerModifier: Optional[Callable[[Butler], Butler]], + run: str | None, + butlerModifier: Callable[[Butler], Butler] | None, ) -> Butler: # This method takes the exports from the existing butler, imports # them into the newly created butler, and then inserts the datasets @@ -371,12 +371,12 @@ def buildExecutionButler( butler: Butler, graph: QuantumGraph, outputLocation: ResourcePathExpression, - run: Optional[str], + run: str | None, *, clobber: bool = False, - butlerModifier: Optional[Callable[[Butler], Butler]] = None, - collections: Optional[Iterable[str]] = None, - datastoreRoot: Optional[ResourcePathExpression] = None, + butlerModifier: Callable[[Butler], Butler] | None = None, + collections: Iterable[str] | None = None, + datastoreRoot: ResourcePathExpression | None = None, transfer: str = "auto", ) -> Butler: r"""Create an execution butler. diff --git a/python/lsst/pipe/base/formatters/pexConfig.py b/python/lsst/pipe/base/formatters/pexConfig.py index 8480ae974..0c3e0477d 100644 --- a/python/lsst/pipe/base/formatters/pexConfig.py +++ b/python/lsst/pipe/base/formatters/pexConfig.py @@ -24,7 +24,7 @@ __all__ = ("PexConfigFormatter",) import os.path -from typing import Any, Optional, Type +from typing import Any from lsst.daf.butler.formatters.file import FileFormatter from lsst.pex.config import Config @@ -37,7 +37,7 @@ class PexConfigFormatter(FileFormatter): extension = ".py" - def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any: + def _readFile(self, path: str, pytype: type[Any] | None = None) -> Any: """Read a pex.config.Config instance from the given file. Parameters diff --git a/python/lsst/pipe/base/graph/_implDetails.py b/python/lsst/pipe/base/graph/_implDetails.py index a1e0aa15c..3f27faec9 100644 --- a/python/lsst/pipe/base/graph/_implDetails.py +++ b/python/lsst/pipe/base/graph/_implDetails.py @@ -23,8 +23,9 @@ __all__ = ("_DatasetTracker", "DatasetTypeName", "_pruner") from collections import defaultdict +from collections.abc import Iterable from itertools import chain -from typing import DefaultDict, Dict, Generic, Iterable, List, NewType, Optional, Set, TypeVar +from typing import Generic, NewType, TypeVar import networkx as nx from lsst.daf.butler import DatasetRef, DatasetType, NamedKeyDict, Quantum @@ -57,11 +58,11 @@ class _DatasetTracker(Generic[_T, _U]): """ def __init__(self, createInverse: bool = False): - self._producers: Dict[_T, _U] = {} - self._consumers: DefaultDict[_T, Set[_U]] = defaultdict(set) + self._producers: dict[_T, _U] = {} + self._consumers: defaultdict[_T, set[_U]] = defaultdict(set) self._createInverse = createInverse if self._createInverse: - self._itemsDict: DefaultDict[_U, Set[_T]] = defaultdict(set) + self._itemsDict: defaultdict[_U, set[_T]] = defaultdict(set) def addProducer(self, key: _T, value: _U) -> None: """Add a key which is produced by some value. @@ -135,7 +136,7 @@ def removeConsumer(self, key: _T, value: _U) -> None: if result_inverse := self._itemsDict.get(value): result_inverse.discard(key) - def getConsumers(self, key: _T) -> Set[_U]: + def getConsumers(self, key: _T) -> set[_U]: """Return all values associated with the consumption of the supplied key. @@ -146,7 +147,7 @@ def getConsumers(self, key: _T) -> Set[_U]: """ return self._consumers.get(key, set()) - def getProducer(self, key: _T) -> Optional[_U]: + def getProducer(self, key: _T) -> _U | None: """Return the value associated with the consumption of the supplied key. @@ -171,7 +172,7 @@ def getAll(self, key: _T) -> set[_U]: return self.getConsumers(key).union(x for x in (self.getProducer(key),) if x is not None) @property - def inverse(self) -> Optional[DefaultDict[_U, Set[_T]]]: + def inverse(self) -> defaultdict[_U, set[_T]] | None: """Return the inverse mapping if class was instantiated to create an inverse, else return None. """ @@ -204,7 +205,7 @@ def makeNetworkXGraph(self) -> nx.DiGraph: graph.add_edge(producer, consumer) return graph - def keys(self) -> Set[_T]: + def keys(self) -> set[_T]: """Return all tracked keys.""" return self._producers.keys() | self._consumers.keys() @@ -240,7 +241,7 @@ def _pruner( datasetRefDict: _DatasetTracker[DatasetRef, QuantumNode], refsToRemove: Iterable[DatasetRef], *, - alreadyPruned: Optional[Set[QuantumNode]] = None, + alreadyPruned: set[QuantumNode] | None = None, ) -> None: r"""Prune supplied dataset refs out of ``datasetRefDict`` container, recursing to additional nodes dependant on pruned refs. @@ -295,7 +296,7 @@ def _pruner( toRemove = ref tmpRefs = set(connectionRefs).difference((toRemove,)) - tmpConnections = NamedKeyDict[DatasetType, List[DatasetRef]](node.quantum.inputs.items()) + tmpConnections = NamedKeyDict[DatasetType, list[DatasetRef]](node.quantum.inputs.items()) tmpConnections[toRemove.datasetType] = list(tmpRefs) helper = AdjustQuantumHelper(inputs=tmpConnections, outputs=node.quantum.outputs) assert node.quantum.dataId is not None, ( diff --git a/python/lsst/pipe/base/graph/_loadHelpers.py b/python/lsst/pipe/base/graph/_loadHelpers.py index 20ab1752f..5d112d2a8 100644 --- a/python/lsst/pipe/base/graph/_loadHelpers.py +++ b/python/lsst/pipe/base/graph/_loadHelpers.py @@ -23,11 +23,12 @@ __all__ = ("LoadHelper",) import struct -from contextlib import ExitStack +from collections.abc import Iterable +from contextlib import AbstractContextManager, ExitStack from dataclasses import dataclass from io import BufferedRandom, BytesIO from types import TracebackType -from typing import TYPE_CHECKING, BinaryIO, ContextManager, Iterable, Optional, Set, Type, Union +from typing import TYPE_CHECKING, BinaryIO from uuid import UUID from lsst.daf.butler import DimensionUniverse @@ -39,7 +40,7 @@ @dataclass -class LoadHelper(ContextManager["LoadHelper"]): +class LoadHelper(AbstractContextManager["LoadHelper"]): """Helper class to assist with selecting the appropriate loader and managing any contexts that may be needed. @@ -47,7 +48,7 @@ class LoadHelper(ContextManager["LoadHelper"]): to be a valid `QuantumGraph` save file. """ - uri: Union[ResourcePath, BinaryIO] + uri: ResourcePath | BinaryIO """ResourcePath object from which the `QuantumGraph` is to be loaded """ minimumVersion: int @@ -59,7 +60,7 @@ class LoadHelper(ContextManager["LoadHelper"]): """ def __post_init__(self) -> None: - self._resourceHandle: Optional[ResourceHandleProtocol] = None + self._resourceHandle: ResourceHandleProtocol | None = None self._exitStack = ExitStack() def _initialize(self) -> None: @@ -150,9 +151,9 @@ def _validateSave(self, magic: bytes, versionBytes: bytes) -> int: def load( self, - universe: Optional[DimensionUniverse] = None, - nodes: Optional[Iterable[Union[UUID, str]]] = None, - graphID: Optional[str] = None, + universe: DimensionUniverse | None = None, + nodes: Iterable[UUID | str] | None = None, + graphID: str | None = None, ) -> QuantumGraph: """Load in the specified nodes from the graph. @@ -201,7 +202,7 @@ def load( if graphID is not None and headerInfo._buildId != graphID: raise ValueError("graphID does not match that of the graph being loaded") # Read in specified nodes, or all the nodes - nodeSet: Set[UUID] + nodeSet: set[UUID] if nodes is None: nodeSet = set(headerInfo.map.keys()) else: @@ -256,15 +257,15 @@ def __enter__(self) -> "LoadHelper": def __exit__( self, - type: Optional[Type[BaseException]], - value: Optional[BaseException], - traceback: Optional[TracebackType], + type: type[BaseException] | None, + value: BaseException | None, + traceback: TracebackType | None, ) -> None: assert self._resourceHandle is not None self._exitStack.close() self._resourceHandle = None - def readHeader(self) -> Optional[str]: + def readHeader(self) -> str | None: with self as handle: result = handle.deserializer.unpackHeader(self._readBytes(*self.headerBytesRange)) return result diff --git a/python/lsst/pipe/base/graph/_versionDeserializers.py b/python/lsst/pipe/base/graph/_versionDeserializers.py index 8c51aa443..8ea984637 100644 --- a/python/lsst/pipe/base/graph/_versionDeserializers.py +++ b/python/lsst/pipe/base/graph/_versionDeserializers.py @@ -29,21 +29,10 @@ import uuid from abc import ABC, abstractmethod from collections import defaultdict +from collections.abc import Callable from dataclasses import dataclass from types import SimpleNamespace -from typing import ( - TYPE_CHECKING, - Callable, - ClassVar, - DefaultDict, - Dict, - List, - Optional, - Set, - Tuple, - Type, - cast, -) +from typing import TYPE_CHECKING, ClassVar, cast import networkx as nx from lsst.daf.butler import ( @@ -72,7 +61,7 @@ class StructSizeDescriptor: (number of bytes) of whatever the formatter string is for a deserializer. """ - def __get__(self, inst: Optional[DeserializerBase], owner: Type[DeserializerBase]) -> int: + def __get__(self, inst: DeserializerBase | None, owner: type[DeserializerBase]) -> int: return struct.calcsize(owner.FMT_STRING()) @@ -93,7 +82,7 @@ def __init_subclass__(cls) -> None: cls.structSize = StructSizeDescriptor() super().__init_subclass__() - def unpackHeader(self, rawHeader: bytes) -> Optional[str]: + def unpackHeader(self, rawHeader: bytes) -> str | None: """Transform the raw bytes corresponding to the header of a save into a string of the header information. @@ -137,7 +126,7 @@ def constructGraph( self, nodes: set[uuid.UUID], _readBytes: Callable[[int, int], bytes], - universe: Optional[DimensionUniverse] = None, + universe: DimensionUniverse | None = None, ) -> QuantumGraph: """Construct a graph from the deserialized information. @@ -213,20 +202,20 @@ def readHeaderInfo(self, rawHeader: bytes) -> SimpleNamespace: self.returnValue = returnValue return returnValue - def unpackHeader(self, rawHeader: bytes) -> Optional[str]: + def unpackHeader(self, rawHeader: bytes) -> str | None: return None def constructGraph( self, nodes: set[uuid.UUID], _readBytes: Callable[[int, int], bytes], - universe: Optional[DimensionUniverse] = None, + universe: DimensionUniverse | None = None, ) -> QuantumGraph: # need to import here to avoid cyclic imports from . import QuantumGraph - quanta: DefaultDict[TaskDef, Set[Quantum]] = defaultdict(set) - quantumToNodeId: Dict[Quantum, uuid.UUID] = {} + quanta: defaultdict[TaskDef, set[Quantum]] = defaultdict(set) + quantumToNodeId: dict[Quantum, uuid.UUID] = {} loadedTaskDef = {} # loop over the nodes specified above for node in nodes: @@ -347,20 +336,20 @@ def readHeaderInfo(self, rawHeader: bytes) -> SimpleNamespace: self.returnValue = returnValue return returnValue - def unpackHeader(self, rawHeader: bytes) -> Optional[str]: + def unpackHeader(self, rawHeader: bytes) -> str | None: return lzma.decompress(rawHeader).decode() def constructGraph( self, nodes: set[uuid.UUID], _readBytes: Callable[[int, int], bytes], - universe: Optional[DimensionUniverse] = None, + universe: DimensionUniverse | None = None, ) -> QuantumGraph: # need to import here to avoid cyclic imports from . import QuantumGraph - quanta: DefaultDict[TaskDef, Set[Quantum]] = defaultdict(set) - quantumToNodeId: Dict[Quantum, uuid.UUID] = {} + quanta: defaultdict[TaskDef, set[Quantum]] = defaultdict(set) + quantumToNodeId: dict[Quantum, uuid.UUID] = {} loadedTaskDef = {} # loop over the nodes specified above for node in nodes: @@ -528,26 +517,26 @@ def readHeaderInfo(self, rawHeader: bytes) -> SimpleNamespace: self.infoMappings = infoMappings return infoMappings - def unpackHeader(self, rawHeader: bytes) -> Optional[str]: + def unpackHeader(self, rawHeader: bytes) -> str | None: return lzma.decompress(rawHeader).decode() def constructGraph( self, nodes: set[uuid.UUID], _readBytes: Callable[[int, int], bytes], - universe: Optional[DimensionUniverse] = None, + universe: DimensionUniverse | None = None, ) -> QuantumGraph: # need to import here to avoid cyclic imports from . import QuantumGraph graph = nx.DiGraph() - loadedTaskDef: Dict[str, TaskDef] = {} + loadedTaskDef: dict[str, TaskDef] = {} container = {} datasetDict = _DatasetTracker[DatasetTypeName, TaskDef](createInverse=True) - taskToQuantumNode: DefaultDict[TaskDef, Set[QuantumNode]] = defaultdict(set) - recontitutedDimensions: Dict[int, Tuple[str, DimensionRecord]] = {} - initInputRefs: Dict[TaskDef, List[DatasetRef]] = {} - initOutputRefs: Dict[TaskDef, List[DatasetRef]] = {} + taskToQuantumNode: defaultdict[TaskDef, set[QuantumNode]] = defaultdict(set) + recontitutedDimensions: dict[int, tuple[str, DimensionRecord]] = {} + initInputRefs: dict[TaskDef, list[DatasetRef]] = {} + initOutputRefs: dict[TaskDef, list[DatasetRef]] = {} if universe is not None: if not universe.isCompatibleWith(self.infoMappings.universe): @@ -582,7 +571,7 @@ def constructGraph( # bytes are compressed, so decompress them taskDefDump = json.loads(lzma.decompress(_readBytes(start, stop))) - taskClass: Type[PipelineTask] = doImportType(taskDefDump["taskName"]) + taskClass: type[PipelineTask] = doImportType(taskDefDump["taskName"]) config: PipelineTaskConfig = taskClass.ConfigClass() config.loadFromStream(taskDefDump["config"]) # Rebuild TaskDef @@ -659,7 +648,7 @@ def constructGraph( return newGraph -DESERIALIZER_MAP: dict[int, Type[DeserializerBase]] = { +DESERIALIZER_MAP: dict[int, type[DeserializerBase]] = { 1: DeserializerV1, 2: DeserializerV2, 3: DeserializerV3, diff --git a/python/lsst/pipe/base/graph/graph.py b/python/lsst/pipe/base/graph/graph.py index eb9d2bc52..fda548da6 100644 --- a/python/lsst/pipe/base/graph/graph.py +++ b/python/lsst/pipe/base/graph/graph.py @@ -32,26 +32,10 @@ import uuid import warnings from collections import defaultdict, deque +from collections.abc import Generator, Iterable, Mapping, MutableMapping from itertools import chain from types import MappingProxyType -from typing import ( - Any, - BinaryIO, - DefaultDict, - Deque, - Dict, - FrozenSet, - Generator, - Iterable, - List, - Mapping, - MutableMapping, - Optional, - Set, - Tuple, - TypeVar, - Union, -) +from typing import Any, BinaryIO, TypeVar import networkx as nx from lsst.daf.butler import DatasetRef, DatasetType, DimensionRecordsAccumulator, DimensionUniverse, Quantum @@ -146,14 +130,14 @@ class QuantumGraph: def __init__( self, - quanta: Mapping[TaskDef, Set[Quantum]], - metadata: Optional[Mapping[str, Any]] = None, - pruneRefs: Optional[Iterable[DatasetRef]] = None, - universe: Optional[DimensionUniverse] = None, - initInputs: Optional[Mapping[TaskDef, Iterable[DatasetRef]]] = None, - initOutputs: Optional[Mapping[TaskDef, Iterable[DatasetRef]]] = None, - globalInitOutputs: Optional[Iterable[DatasetRef]] = None, - registryDatasetTypes: Optional[Iterable[DatasetType]] = None, + quanta: Mapping[TaskDef, set[Quantum]], + metadata: Mapping[str, Any] | None = None, + pruneRefs: Iterable[DatasetRef] | None = None, + universe: DimensionUniverse | None = None, + initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, + initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, + globalInitOutputs: Iterable[DatasetRef] | None = None, + registryDatasetTypes: Iterable[DatasetType] | None = None, ): self._buildGraphs( quanta, @@ -168,17 +152,17 @@ def __init__( def _buildGraphs( self, - quanta: Mapping[TaskDef, Set[Quantum]], + quanta: Mapping[TaskDef, set[Quantum]], *, - _quantumToNodeId: Optional[Mapping[Quantum, uuid.UUID]] = None, - _buildId: Optional[BuildId] = None, - metadata: Optional[Mapping[str, Any]] = None, - pruneRefs: Optional[Iterable[DatasetRef]] = None, - universe: Optional[DimensionUniverse] = None, - initInputs: Optional[Mapping[TaskDef, Iterable[DatasetRef]]] = None, - initOutputs: Optional[Mapping[TaskDef, Iterable[DatasetRef]]] = None, - globalInitOutputs: Optional[Iterable[DatasetRef]] = None, - registryDatasetTypes: Optional[Iterable[DatasetType]] = None, + _quantumToNodeId: Mapping[Quantum, uuid.UUID] | None = None, + _buildId: BuildId | None = None, + metadata: Mapping[str, Any] | None = None, + pruneRefs: Iterable[DatasetRef] | None = None, + universe: DimensionUniverse | None = None, + initInputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, + initOutputs: Mapping[TaskDef, Iterable[DatasetRef]] | None = None, + globalInitOutputs: Iterable[DatasetRef] | None = None, + registryDatasetTypes: Iterable[DatasetType] | None = None, ) -> None: """Build the graph that is used to store the relation between tasks, and the graph that holds the relations between quanta @@ -191,8 +175,8 @@ def _buildGraphs( self._datasetDict = _DatasetTracker[DatasetTypeName, TaskDef](createInverse=True) self._datasetRefDict = _DatasetTracker[DatasetRef, QuantumNode]() - self._nodeIdMap: Dict[uuid.UUID, QuantumNode] = {} - self._taskToQuantumNode: MutableMapping[TaskDef, Set[QuantumNode]] = defaultdict(set) + self._nodeIdMap: dict[uuid.UUID, QuantumNode] = {} + self._taskToQuantumNode: MutableMapping[TaskDef, set[QuantumNode]] = defaultdict(set) for taskDef, quantumSet in quanta.items(): connections = taskDef.connections @@ -258,13 +242,13 @@ def _buildGraphs( if pruneRefs is not None: # track what refs were pruned and prune the graph - prunes: Set[QuantumNode] = set() + prunes: set[QuantumNode] = set() _pruner(self._datasetRefDict, pruneRefs, alreadyPruned=prunes) # recreate the taskToQuantumNode dict removing nodes that have been # pruned. Keep track of task defs that now have no QuantumNodes - emptyTasks: Set[str] = set() - newTaskToQuantumNode: DefaultDict[TaskDef, Set[QuantumNode]] = defaultdict(set) + emptyTasks: set[str] = set() + newTaskToQuantumNode: defaultdict[TaskDef, set[QuantumNode]] = defaultdict(set) # accumulate all types types_ = set() # tracker for any pruneRefs that have caused tasks to have no nodes @@ -319,10 +303,10 @@ def _buildGraphs( # insertion self._taskToQuantumNode = dict(self._taskToQuantumNode.items()) - self._initInputRefs: Dict[TaskDef, List[DatasetRef]] = {} - self._initOutputRefs: Dict[TaskDef, List[DatasetRef]] = {} - self._globalInitOutputRefs: List[DatasetRef] = [] - self._registryDatasetTypes: List[DatasetType] = [] + self._initInputRefs: dict[TaskDef, list[DatasetRef]] = {} + self._initOutputRefs: dict[TaskDef, list[DatasetRef]] = {} + self._globalInitOutputRefs: list[DatasetRef] = [] + self._registryDatasetTypes: list[DatasetType] = [] if initInputs is not None: self._initInputRefs = {taskDef: list(refs) for taskDef, refs in initInputs.items()} if initOutputs is not None: @@ -369,7 +353,7 @@ def outputQuanta(self) -> Iterable[QuantumNode]: return [q for q, n in self._connectedQuanta.out_degree if n == 0] @property - def allDatasetTypes(self) -> Tuple[DatasetTypeName, ...]: + def allDatasetTypes(self) -> tuple[DatasetTypeName, ...]: """All the data set type names that are present in the graph (`tuple` [`str`]). @@ -405,7 +389,7 @@ def pruneGraphFromRefs(self: _T, refs: Iterable[DatasetRef]) -> _T: quantumMap[node.taskDef].add(node.quantum) # convert to standard dict to prevent accidental key insertion - quantumDict: Dict[TaskDef, Set[Quantum]] = dict(quantumMap.items()) + quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items()) # This should not change set of tasks in a graph, so we can keep the # same registryDatasetTypes as in the original graph. @@ -441,7 +425,7 @@ def getQuantumNodeByNodeId(self, nodeId: uuid.UUID) -> QuantumNode: """ return self._nodeIdMap[nodeId] - def getQuantaForTask(self, taskDef: TaskDef) -> FrozenSet[Quantum]: + def getQuantaForTask(self, taskDef: TaskDef) -> frozenset[Quantum]: """Return all the `~lsst.daf.butler.Quantum` associated with a `TaskDef`. @@ -477,7 +461,7 @@ def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int: """ return len(self._taskToQuantumNode.get(taskDef, ())) - def getNodesForTask(self, taskDef: TaskDef) -> FrozenSet[QuantumNode]: + def getNodesForTask(self, taskDef: TaskDef) -> frozenset[QuantumNode]: r"""Return all the `QuantumNode`\s associated with a `TaskDef`. Parameters @@ -519,7 +503,7 @@ def findTasksWithInput(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskD """ return (c for c in self._datasetDict.getConsumers(datasetTypeName)) - def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> Optional[TaskDef]: + def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> TaskDef | None: """Find all tasks that have the specified dataset type name as an output. @@ -567,7 +551,7 @@ def tasksWithDSType(self, datasetTypeName: DatasetTypeName) -> Iterable[TaskDef] """ return self._datasetDict.getAll(datasetTypeName) - def findTaskDefByName(self, taskName: str) -> List[TaskDef]: + def findTaskDefByName(self, taskName: str) -> list[TaskDef]: """Determine which `TaskDef` objects in this graph are associated with a `str` representing a task name (looks at the ``taskName`` property of `TaskDef` objects). @@ -594,7 +578,7 @@ def findTaskDefByName(self, taskName: str) -> List[TaskDef]: results.append(task) return results - def findTaskDefByLabel(self, label: str) -> Optional[TaskDef]: + def findTaskDefByLabel(self, label: str) -> TaskDef | None: """Determine which `TaskDef` objects in this graph are associated with a `str` representing a tasks label. @@ -613,7 +597,7 @@ def findTaskDefByLabel(self, label: str) -> Optional[TaskDef]: return task return None - def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> Set[Quantum]: + def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> set[Quantum]: r"""Return all the `~lsst.daf.butler.Quantum` that contain a specified `DatasetTypeName`. @@ -637,7 +621,7 @@ def findQuantaWithDSType(self, datasetTypeName: DatasetTypeName) -> Set[Quantum] """ tasks = self._datasetDict.getAll(datasetTypeName) - result: Set[Quantum] = set() + result: set[Quantum] = set() result = result.union(quantum for task in tasks for quantum in self.getQuantaForTask(task)) return result @@ -659,7 +643,7 @@ def checkQuantumInGraph(self, quantum: Quantum) -> bool: return True return False - def writeDotGraph(self, output: Union[str, io.BufferedIOBase]) -> None: + def writeDotGraph(self, output: str | io.BufferedIOBase) -> None: """Write out the graph as a dot graph. Parameters @@ -669,7 +653,7 @@ def writeDotGraph(self, output: Union[str, io.BufferedIOBase]) -> None: """ write_dot(self._connectedQuanta, output) - def subset(self: _T, nodes: Union[QuantumNode, Iterable[QuantumNode]]) -> _T: + def subset(self: _T, nodes: QuantumNode | Iterable[QuantumNode]) -> _T: """Create a new graph object that contains the subset of the nodes specified as input. Node number is preserved. @@ -709,7 +693,7 @@ def subset(self: _T, nodes: Union[QuantumNode, Iterable[QuantumNode]]) -> _T: ] # convert to standard dict to prevent accidental key insertion - quantumDict: Dict[TaskDef, Set[Quantum]] = dict(quantumMap.items()) + quantumDict: dict[TaskDef, set[Quantum]] = dict(quantumMap.items()) # Create an empty graph, and then populate it with custom mapping newInst = type(self)({}, universe=self._universe) # TODO: Do we need to copy initInputs/initOutputs? @@ -724,7 +708,7 @@ def subset(self: _T, nodes: Union[QuantumNode, Iterable[QuantumNode]]) -> _T: ) return newInst - def subsetToConnected(self: _T) -> Tuple[_T, ...]: + def subsetToConnected(self: _T) -> tuple[_T, ...]: """Generate a list of subgraphs where each is connected. Returns @@ -737,7 +721,7 @@ def subsetToConnected(self: _T) -> Tuple[_T, ...]: for connectedSet in nx.weakly_connected_components(self._connectedQuanta) ) - def determineInputsToQuantumNode(self, node: QuantumNode) -> Set[QuantumNode]: + def determineInputsToQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: """Return a set of `QuantumNode` that are direct inputs to a specified node. @@ -753,7 +737,7 @@ def determineInputsToQuantumNode(self, node: QuantumNode) -> Set[QuantumNode]: """ return set(pred for pred in self._connectedQuanta.predecessors(node)) - def determineOutputsOfQuantumNode(self, node: QuantumNode) -> Set[QuantumNode]: + def determineOutputsOfQuantumNode(self, node: QuantumNode) -> set[QuantumNode]: """Return a set of `QuantumNode` that are direct outputs of a specified node. @@ -806,7 +790,7 @@ def determineAncestorsOfQuantumNode(self: _T, node: QuantumNode) -> _T: predecessorNodes.add(node) return self.subset(predecessorNodes) - def findCycle(self) -> List[Tuple[QuantumNode, QuantumNode]]: + def findCycle(self) -> list[tuple[QuantumNode, QuantumNode]]: """Check a graph for the presense of cycles and returns the edges of any cycles found, or an empty list if there is no cycle. @@ -837,7 +821,7 @@ def saveUri(self, uri: ResourcePathExpression) -> None: path.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes @property - def metadata(self) -> Optional[MappingProxyType[str, Any]]: + def metadata(self) -> MappingProxyType[str, Any] | None: """Extra data carried with the graph (mapping [`str`] or `None`). The mapping is a dynamic view of this object's metadata. Values should @@ -847,7 +831,7 @@ def metadata(self) -> Optional[MappingProxyType[str, Any]]: return None return MappingProxyType(self._metadata) - def initInputRefs(self, taskDef: TaskDef) -> Optional[List[DatasetRef]]: + def initInputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: """Return DatasetRefs for a given task InitInputs. Parameters @@ -863,7 +847,7 @@ def initInputRefs(self, taskDef: TaskDef) -> Optional[List[DatasetRef]]: """ return self._initInputRefs.get(taskDef) - def initOutputRefs(self, taskDef: TaskDef) -> Optional[List[DatasetRef]]: + def initOutputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: """Return DatasetRefs for a given task InitOutputs. Parameters @@ -880,7 +864,7 @@ def initOutputRefs(self, taskDef: TaskDef) -> Optional[List[DatasetRef]]: """ return self._initOutputRefs.get(taskDef) - def globalInitOutputRefs(self) -> List[DatasetRef]: + def globalInitOutputRefs(self) -> list[DatasetRef]: """Return DatasetRefs for global InitOutputs. Returns @@ -890,7 +874,7 @@ def globalInitOutputRefs(self) -> List[DatasetRef]: """ return self._globalInitOutputRefs - def registryDatasetTypes(self) -> List[DatasetType]: + def registryDatasetTypes(self) -> list[DatasetType]: """Return dataset types used by this graph, their definitions match dataset types from registry. @@ -905,9 +889,9 @@ def registryDatasetTypes(self) -> List[DatasetType]: def loadUri( cls, uri: ResourcePathExpression, - universe: Optional[DimensionUniverse] = None, - nodes: Optional[Iterable[uuid.UUID]] = None, - graphID: Optional[BuildId] = None, + universe: DimensionUniverse | None = None, + nodes: Iterable[uuid.UUID] | None = None, + graphID: BuildId | None = None, minimumVersion: int = 3, ) -> QuantumGraph: """Read `QuantumGraph` from a URI. @@ -986,7 +970,7 @@ def loadUri( return qgraph @classmethod - def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> Optional[str]: + def readHeader(cls, uri: ResourcePathExpression, minimumVersion: int = 3) -> str | None: """Read the header of a `QuantumGraph` pointed to by the uri parameter and return it as a string. @@ -1041,15 +1025,15 @@ def save(self, file: BinaryIO) -> None: buffer = self._buildSaveObject() file.write(buffer) # type: ignore # Ignore because bytearray is safe to use in place of bytes - def _buildSaveObject(self, returnHeader: bool = False) -> Union[bytearray, Tuple[bytearray, Dict]]: + def _buildSaveObject(self, returnHeader: bool = False) -> bytearray | tuple[bytearray, dict]: # make some containers - jsonData: Deque[bytes] = deque() + jsonData: deque[bytes] = deque() # node map is a list because json does not accept mapping keys that # are not strings, so we store a list of key, value pairs that will # be converted to a mapping on load nodeMap = [] taskDefMap = {} - headerData: Dict[str, Any] = {} + headerData: dict[str, Any] = {} # Store the QauntumGraph BuildId, this will allow validating BuildIds # at load time, prior to loading any QuantumNodes. Name chosen for @@ -1071,7 +1055,7 @@ def _buildSaveObject(self, returnHeader: bool = False) -> Union[bytearray, Tuple for taskDef in self.taskGraph: # compressing has very little impact on saving or load time, but # a large impact on on disk size, so it is worth doing - taskDescription: Dict[str, Any] = {} + taskDescription: dict[str, Any] = {} # save the fully qualified name. taskDescription["taskName"] = get_full_type_name(taskDef.taskClass) # save the config as a text stream that will be un-persisted on the @@ -1199,9 +1183,9 @@ def _buildSaveObject(self, returnHeader: bool = False) -> Union[bytearray, Tuple def load( cls, file: BinaryIO, - universe: Optional[DimensionUniverse] = None, - nodes: Optional[Iterable[uuid.UUID]] = None, - graphID: Optional[BuildId] = None, + universe: DimensionUniverse | None = None, + nodes: Iterable[uuid.UUID] | None = None, + graphID: BuildId | None = None, minimumVersion: int = 3, ) -> QuantumGraph: """Read `QuantumGraph` from a file that was made by `save`. @@ -1372,7 +1356,7 @@ def __getstate__(self) -> dict: reconstructed with this information, and it preserves the ordering of the graph nodes. """ - universe: Optional[DimensionUniverse] = None + universe: DimensionUniverse | None = None for node in self: dId = node.quantum.dataId if dId is None: diff --git a/python/lsst/pipe/base/graph/quantumNode.py b/python/lsst/pipe/base/graph/quantumNode.py index 05bf97069..2c3c96067 100644 --- a/python/lsst/pipe/base/graph/quantumNode.py +++ b/python/lsst/pipe/base/graph/quantumNode.py @@ -24,7 +24,7 @@ import uuid from dataclasses import dataclass -from typing import Any, Dict, NewType, Optional, Tuple +from typing import Any, NewType from lsst.daf.butler import ( DatasetRef, @@ -120,7 +120,7 @@ def __repr__(self) -> str: f"{self.__class__.__name__}(quantum={self.quantum}, taskDef={self.taskDef}, nodeId={self.nodeId})" ) - def to_simple(self, accumulator: Optional[DimensionRecordsAccumulator] = None) -> SerializedQuantumNode: + def to_simple(self, accumulator: DimensionRecordsAccumulator | None = None) -> SerializedQuantumNode: return SerializedQuantumNode( quantum=self.quantum.to_simple(accumulator=accumulator), taskLabel=self.taskDef.label, @@ -131,9 +131,9 @@ def to_simple(self, accumulator: Optional[DimensionRecordsAccumulator] = None) - def from_simple( cls, simple: SerializedQuantumNode, - taskDefMap: Dict[str, TaskDef], + taskDefMap: dict[str, TaskDef], universe: DimensionUniverse, - recontitutedDimensions: Optional[Dict[int, Tuple[str, DimensionRecord]]] = None, + recontitutedDimensions: dict[int, tuple[str, DimensionRecord]] | None = None, ) -> QuantumNode: return QuantumNode( quantum=Quantum.from_simple( @@ -150,7 +150,7 @@ class SerializedQuantumNode(BaseModel): nodeId: uuid.UUID @classmethod - def direct(cls, *, quantum: Dict[str, Any], taskLabel: str, nodeId: str) -> SerializedQuantumNode: + def direct(cls, *, quantum: dict[str, Any], taskLabel: str, nodeId: str) -> SerializedQuantumNode: node = SerializedQuantumNode.__new__(cls) setter = object.__setattr__ setter(node, "quantum", SerializedQuantum.direct(**quantum)) diff --git a/python/lsst/pipe/base/graphBuilder.py b/python/lsst/pipe/base/graphBuilder.py index 7701a81fe..11b38dc76 100644 --- a/python/lsst/pipe/base/graphBuilder.py +++ b/python/lsst/pipe/base/graphBuilder.py @@ -34,7 +34,7 @@ from collections.abc import Collection, Iterable, Iterator, Mapping from contextlib import contextmanager from dataclasses import dataclass -from typing import Any, Optional +from typing import Any from lsst.daf.butler import ( CollectionType, @@ -414,7 +414,7 @@ def __repr__(self) -> str: inputs to this quantum. """ - def makeQuantum(self, datastore_records: Optional[Mapping[str, DatastoreRecordData]] = None) -> Quantum: + def makeQuantum(self, datastore_records: Mapping[str, DatastoreRecordData] | None = None) -> Quantum: """Transform the scaffolding object into a true `Quantum` instance. Parameters @@ -445,7 +445,7 @@ def makeQuantum(self, datastore_records: Optional[Mapping[str, DatastoreRecordDa ) helper.adjust_in_place(self.task.taskDef.connections, self.task.taskDef.label, self.dataId) initInputs = self.task.initInputs.unpackSingleRefs(self.task.storage_classes) - quantum_records: Optional[Mapping[str, DatastoreRecordData]] = None + quantum_records: Mapping[str, DatastoreRecordData] | None = None if datastore_records is not None: quantum_records = {} input_refs = list(itertools.chain.from_iterable(helper.inputs.values())) @@ -574,7 +574,7 @@ def __repr__(self) -> str: def makeQuantumSet( self, missing: _DatasetDict, - datastore_records: Optional[Mapping[str, DatastoreRecordData]] = None, + datastore_records: Mapping[str, DatastoreRecordData] | None = None, ) -> set[Quantum]: """Create a `set` of `Quantum` from the information in ``self``. @@ -816,10 +816,10 @@ def connectDataIds( self, registry: Registry, collections: Any, - userQuery: Optional[str], + userQuery: str | None, externalDataId: DataCoordinate, datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL, - bind: Optional[Mapping[str, Any]] = None, + bind: Mapping[str, Any] | None = None, ) -> Iterator[DataCoordinateQueryResults]: """Query for the data IDs that connect nodes in the `QuantumGraph`. @@ -929,7 +929,7 @@ def connectDataIds( self.intermediates.items(), self.outputs.items(), ): - datasetDataId: Optional[DataCoordinate] + datasetDataId: DataCoordinate | None if (datasetDataId := dataIdCacheForRow.get(datasetType.dimensions)) is None: datasetDataId = commonDataId.subset(datasetType.dimensions) dataIdCacheForRow[datasetType.dimensions] = datasetDataId @@ -1383,8 +1383,8 @@ def resolveDatasetRefs( def makeQuantumGraph( self, registry: Registry, - metadata: Optional[Mapping[str, Any]] = None, - datastore: Optional[Datastore] = None, + metadata: Mapping[str, Any] | None = None, + datastore: Datastore | None = None, ) -> QuantumGraph: """Create a `QuantumGraph` from the quanta already present in the scaffolding data structure. @@ -1413,7 +1413,7 @@ def _make_refs(dataset_dict: _DatasetDict) -> Iterable[DatasetRef]: for holder in ref_dict.values(): yield holder.resolved_ref - datastore_records: Optional[Mapping[str, DatastoreRecordData]] = None + datastore_records: Mapping[str, DatastoreRecordData] | None = None if datastore is not None: datastore_records = datastore.export_records( itertools.chain( @@ -1556,7 +1556,7 @@ def __init__( registry: Registry, skipExistingIn: Any = None, clobberOutputs: bool = True, - datastore: Optional[Datastore] = None, + datastore: Datastore | None = None, ): self.registry = registry self.dimensions = registry.dimensions @@ -1569,10 +1569,10 @@ def makeGraph( pipeline: Pipeline | Iterable[TaskDef], collections: Any, run: str, - userQuery: Optional[str], + userQuery: str | None, datasetQueryConstraint: DatasetQueryConstraintVariant = DatasetQueryConstraintVariant.ALL, - metadata: Optional[Mapping[str, Any]] = None, - bind: Optional[Mapping[str, Any]] = None, + metadata: Mapping[str, Any] | None = None, + bind: Mapping[str, Any] | None = None, dataId: DataCoordinate | None = None, ) -> QuantumGraph: """Create execution graph for a pipeline. diff --git a/python/lsst/pipe/base/pipeTools.py b/python/lsst/pipe/base/pipeTools.py index 84df10ae3..d15246ada 100644 --- a/python/lsst/pipe/base/pipeTools.py +++ b/python/lsst/pipe/base/pipeTools.py @@ -31,7 +31,8 @@ # Imports of standard modules -- # ------------------------------- import itertools -from typing import TYPE_CHECKING, Iterable, List, Optional, Union +from collections.abc import Iterable +from typing import TYPE_CHECKING # ----------------------------- # Imports for other modules -- @@ -71,9 +72,7 @@ class PipelineDataCycleError(Exception): pass -def isPipelineOrdered( - pipeline: Union[Pipeline, Iterable[TaskDef]], taskFactory: Optional[TaskFactory] = None -) -> bool: +def isPipelineOrdered(pipeline: Pipeline | Iterable[TaskDef], taskFactory: TaskFactory | None = None) -> bool: """Check whether tasks in pipeline are correctly ordered. Pipeline is correctly ordered if for any DatasetType produced by a task @@ -123,7 +122,7 @@ def isPipelineOrdered( return True -def orderPipeline(pipeline: List[TaskDef]) -> List[TaskDef]: +def orderPipeline(pipeline: list[TaskDef]) -> list[TaskDef]: """Re-order tasks in pipeline to satisfy data dependencies. When possible new ordering keeps original relative order of the tasks. diff --git a/python/lsst/pipe/base/pipeline.py b/python/lsst/pipe/base/pipeline.py index bfe54c1f8..abc42e7dd 100644 --- a/python/lsst/pipe/base/pipeline.py +++ b/python/lsst/pipe/base/pipeline.py @@ -33,25 +33,10 @@ # ------------------------------- # Imports of standard modules -- # ------------------------------- +from collections.abc import Callable, Generator, Iterable, Iterator, Mapping, Set from dataclasses import dataclass from types import MappingProxyType -from typing import ( - TYPE_CHECKING, - AbstractSet, - Callable, - ClassVar, - Dict, - Generator, - Iterable, - Iterator, - Mapping, - Optional, - Set, - Tuple, - Type, - Union, - cast, -) +from typing import TYPE_CHECKING, ClassVar, cast # ----------------------------- # Imports for other modules -- @@ -104,9 +89,9 @@ class LabelSpecifier: and correct behavior is not guaranteed, or may vary from run to run. """ - labels: Optional[Set[str]] = None - begin: Optional[str] = None - end: Optional[str] = None + labels: set[str] | None = None + begin: str | None = None + end: str | None = None def __post_init__(self) -> None: if self.labels is not None and (self.begin or self.end): @@ -145,10 +130,10 @@ class TaskDef: def __init__( self, - taskName: Optional[str] = None, - config: Optional[PipelineTaskConfig] = None, - taskClass: Optional[Type[PipelineTask]] = None, - label: Optional[str] = None, + taskName: str | None = None, + config: PipelineTaskConfig | None = None, + taskClass: type[PipelineTask] | None = None, + label: str | None = None, ): if taskName is None: if taskClass is None: @@ -203,7 +188,7 @@ def makeMetadataDatasetName(cls, label: str) -> str: return acc.METADATA_OUTPUT_TEMPLATE.format(label=label) @property - def logOutputDatasetName(self) -> Optional[str]: + def logOutputDatasetName(self) -> str | None: """Name of a dataset type for log output from this task, `None` if logs are not to be saved (`str`) """ @@ -240,7 +225,7 @@ def _unreduce(cls, taskName: str, config: PipelineTaskConfig, label: str) -> Tas """ return cls(taskName=taskName, config=config, label=label) - def __reduce__(self) -> Tuple[Callable[[str, PipelineTaskConfig, str], TaskDef], Tuple[str, Config, str]]: + def __reduce__(self) -> tuple[Callable[[str, PipelineTaskConfig, str], TaskDef], tuple[str, Config, str]]: return (self._unreduce, (self.taskName, self.config, self.label)) @@ -404,7 +389,7 @@ def subsetFromLabels(self, labelSpecifier: LabelSpecifier) -> Pipeline: return Pipeline.fromIR(self._pipelineIR.subset_from_labels(labelSet)) @staticmethod - def _parse_file_specifier(uri: ResourcePathExpression) -> Tuple[ResourcePath, Optional[LabelSpecifier]]: + def _parse_file_specifier(uri: ResourcePathExpression) -> tuple[ResourcePath, LabelSpecifier | None]: """Split appart a uri and any possible label subsets""" if isinstance(uri, str): # This is to support legacy pipelines during transition @@ -420,10 +405,10 @@ def _parse_file_specifier(uri: ResourcePathExpression) -> Tuple[ResourcePath, Op uri = ResourcePath(uri) label_subset = uri.fragment or None - specifier: Optional[LabelSpecifier] + specifier: LabelSpecifier | None if label_subset is not None: label_subset = urllib.parse.unquote(label_subset) - args: Dict[str, Union[Set[str], str, None]] + args: dict[str, set[str] | str | None] # labels supplied as a list if "," in label_subset: if ".." in label_subset: @@ -591,7 +576,7 @@ def findSubsetsWithLabel(self, label: str) -> set[str]: results.add(subset.label) return results - def addInstrument(self, instrument: Union[Instrument, str]) -> None: + def addInstrument(self, instrument: Instrument | str) -> None: """Add an instrument to the pipeline, or replace an instrument that is already defined. @@ -610,7 +595,7 @@ def addInstrument(self, instrument: Union[Instrument, str]) -> None: instrument = get_full_type_name(instrument) self._pipelineIR.instrument = instrument - def getInstrument(self) -> Optional[str]: + def getInstrument(self) -> str | None: """Get the instrument from the pipeline. Returns @@ -643,7 +628,7 @@ def get_data_id(self, universe: DimensionUniverse) -> DataCoordinate: return DataCoordinate.standardize(instrument=instrument_class.getName(), universe=universe) return DataCoordinate.makeEmpty(universe) - def addTask(self, task: Union[Type[PipelineTask], str], label: str) -> None: + def addTask(self, task: type[PipelineTask] | str, label: str) -> None: """Add a new task to the pipeline, or replace a task that is already associated with the supplied label. @@ -793,7 +778,7 @@ def toExpandedPipeline(self) -> Generator[TaskDef, None, None]: def _buildTaskDef(self, label: str) -> TaskDef: if (taskIR := self._pipelineIR.tasks.get(label)) is None: raise NameError(f"Label {label} does not appear in this pipeline") - taskClass: Type[PipelineTask] = doImportType(taskIR.klass) + taskClass: type[PipelineTask] = doImportType(taskIR.klass) taskName = get_full_type_name(taskClass) config = taskClass.ConfigClass() instrument: PipeBaseInstrument | None = None @@ -903,7 +888,7 @@ def fromTaskDef( *, registry: Registry, include_configs: bool = True, - storage_class_mapping: Optional[Mapping[str, str]] = None, + storage_class_mapping: Mapping[str, str] | None = None, ) -> TaskDatasetTypes: """Extract and classify the dataset types from a single `PipelineTask`. @@ -1213,7 +1198,7 @@ class PipelineDatasetTypes: @classmethod def fromPipeline( cls, - pipeline: Union[Pipeline, Iterable[TaskDef]], + pipeline: Pipeline | Iterable[TaskDef], *, registry: Registry, include_configs: bool = True, @@ -1268,7 +1253,7 @@ def fromPipeline( pipeline = list(pipeline) # collect all the output dataset types - typeStorageclassMap: Dict[str, str] = {} + typeStorageclassMap: dict[str, str] = {} for taskDef in pipeline: for outConnection in iterConnections(taskDef.connections, "outputs"): typeStorageclassMap[outConnection.name] = outConnection.storageClass @@ -1333,7 +1318,7 @@ def checkConsistency(a: NamedValueSet, b: NamedValueSet) -> None: checkConsistency(allInputs, intermediateComposites) checkConsistency(allOutputs, intermediateComposites) - def frozen(s: AbstractSet[DatasetType]) -> NamedValueSet[DatasetType]: + def frozen(s: Set[DatasetType]) -> NamedValueSet[DatasetType]: assert isinstance(s, NamedValueSet) s.freeze() return s @@ -1359,7 +1344,7 @@ def frozen(s: AbstractSet[DatasetType]) -> NamedValueSet[DatasetType]: @classmethod def initOutputNames( cls, - pipeline: Union[Pipeline, Iterable[TaskDef]], + pipeline: Pipeline | Iterable[TaskDef], *, include_configs: bool = True, include_packages: bool = True, diff --git a/python/lsst/pipe/base/pipelineTask.py b/python/lsst/pipe/base/pipelineTask.py index cfe66904a..f3aca363c 100644 --- a/python/lsst/pipe/base/pipelineTask.py +++ b/python/lsst/pipe/base/pipelineTask.py @@ -26,7 +26,7 @@ __all__ = ["PipelineTask"] # Classes in this module -from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Type, Union +from typing import TYPE_CHECKING, Any, ClassVar from .connections import InputQuantizedConnection, OutputQuantizedConnection from .task import Task @@ -93,15 +93,15 @@ class PipelineTask(Task): permitted to require this argument. """ - ConfigClass: ClassVar[Type[PipelineTaskConfig]] + ConfigClass: ClassVar[type[PipelineTaskConfig]] canMultiprocess: ClassVar[bool] = True def __init__( self, *, - config: Optional[PipelineTaskConfig] = None, - log: Optional[Union[logging.Logger, LsstLogAdapter]] = None, - initInputs: Optional[Dict[str, Any]] = None, + config: PipelineTaskConfig | None = None, + log: logging.Logger | LsstLogAdapter | None = None, + initInputs: dict[str, Any] | None = None, **kwargs: Any, ): super().__init__(config=config, log=log, **kwargs) diff --git a/python/lsst/pipe/base/script/register_instrument.py b/python/lsst/pipe/base/script/register_instrument.py index dae11b243..5642bd62f 100644 --- a/python/lsst/pipe/base/script/register_instrument.py +++ b/python/lsst/pipe/base/script/register_instrument.py @@ -21,13 +21,11 @@ __all__ = ["register_instrument"] -from typing import List - from lsst.daf.butler import Butler from lsst.pipe.base import Instrument -def register_instrument(repo: str, instrument: List[str], update: bool = False) -> None: +def register_instrument(repo: str, instrument: list[str], update: bool = False) -> None: """Add an instrument to the data repository. Parameters diff --git a/python/lsst/pipe/base/struct.py b/python/lsst/pipe/base/struct.py index 4b06a5b90..1bbf2dbef 100644 --- a/python/lsst/pipe/base/struct.py +++ b/python/lsst/pipe/base/struct.py @@ -25,7 +25,7 @@ __all__ = ["Struct"] from types import SimpleNamespace -from typing import Any, Dict +from typing import Any class Struct(SimpleNamespace): @@ -95,7 +95,7 @@ def __safeAdd(self, name: str, val: Any) -> None: raise RuntimeError(f"Item name {name!r} invalid; must not begin with __") setattr(self, name, val) - def getDict(self) -> Dict[str, Any]: + def getDict(self) -> dict[str, Any]: """Get a dictionary of fields in this struct. Returns diff --git a/python/lsst/pipe/base/task.py b/python/lsst/pipe/base/task.py index 26aecf3d8..a871b9d5f 100644 --- a/python/lsst/pipe/base/task.py +++ b/python/lsst/pipe/base/task.py @@ -27,19 +27,8 @@ import contextlib import logging import weakref -from typing import ( - TYPE_CHECKING, - Any, - Callable, - ClassVar, - Dict, - Iterator, - Optional, - Sequence, - Tuple, - Type, - Union, -) +from collections.abc import Callable, Iterator, Sequence +from typing import TYPE_CHECKING, Any, ClassVar import lsst.utils import lsst.utils.logging @@ -147,7 +136,7 @@ class Task: because it simplifies construction (e.g. for unit tests). """ - ConfigClass: ClassVar[Type[Config]] + ConfigClass: ClassVar[type[Config]] _DefaultName: ClassVar[str] _add_module_logger_prefix: bool = True @@ -156,14 +145,14 @@ class Task: def __init__( self, - config: Optional[Config] = None, + config: Config | None = None, *, - name: Optional[str] = None, - parentTask: Optional[Task] = None, - log: Optional[Union[logging.Logger, lsst.utils.logging.LsstLogAdapter]] = None, + name: str | None = None, + parentTask: Task | None = None, + log: logging.Logger | lsst.utils.logging.LsstLogAdapter | None = None, ): self.metadata = _TASK_METADATA_TYPE() - self.__parentTask: Optional[weakref.ReferenceType] + self.__parentTask: weakref.ReferenceType | None self.__parentTask = parentTask if parentTask is None else weakref.ref(parentTask) if parentTask is not None: @@ -173,7 +162,7 @@ def __init__( self._fullName = parentTask._computeFullName(name) if config is None: config = getattr(parentTask.config, name) - self._taskDict: Dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict + self._taskDict: dict[str, weakref.ReferenceType[Task]] = parentTask._taskDict loggerName = parentTask.log.getChild(name).name else: if name is None: @@ -212,7 +201,7 @@ def __init__( self._taskDict[self._fullName] = weakref.ref(self) @property - def _parentTask(self) -> Optional[Task]: + def _parentTask(self) -> Task | None: return self.__parentTask if self.__parentTask is None else self.__parentTask() def emptyMetadata(self) -> None: @@ -283,7 +272,7 @@ def getName(self) -> str: """ return self._name - def getTaskDict(self) -> Dict[str, weakref.ReferenceType[Task]]: + def getTaskDict(self) -> dict[str, weakref.ReferenceType[Task]]: """Get a dictionary of all tasks as a shallow copy. Returns @@ -406,7 +395,7 @@ def _computeFullName(self, name: str) -> str: @staticmethod def _unpickle_via_factory( - factory: Callable[..., Task], args: Sequence[Any], kwargs: Dict[str, Any] + factory: Callable[..., Task], args: Sequence[Any], kwargs: dict[str, Any] ) -> Task: """Unpickle something by calling a factory @@ -415,7 +404,7 @@ def _unpickle_via_factory( """ return factory(*args, **kwargs) - def _reduce_kwargs(self) -> Dict[str, Any]: + def _reduce_kwargs(self) -> dict[str, Any]: """Return a dict of the keyword arguments that should be used by `__reduce__`. @@ -435,9 +424,9 @@ class method to ensure that the standard parameters are included. def __reduce__( self, - ) -> Tuple[ - Callable[[Callable[..., Task], Sequence[Any], Dict[str, Any]], Task], - Tuple[Type[Task], Sequence[Any], Dict[str, Any]], + ) -> tuple[ + Callable[[Callable[..., Task], Sequence[Any], dict[str, Any]], Task], + tuple[type[Task], Sequence[Any], dict[str, Any]], ]: """Pickler.""" return self._unpickle_via_factory, (self.__class__, [], self._reduce_kwargs()) diff --git a/python/lsst/pipe/base/testUtils.py b/python/lsst/pipe/base/testUtils.py index 7b41fb652..fd535eaf4 100644 --- a/python/lsst/pipe/base/testUtils.py +++ b/python/lsst/pipe/base/testUtils.py @@ -35,7 +35,8 @@ import itertools import unittest.mock from collections import defaultdict -from typing import TYPE_CHECKING, AbstractSet, Any, Dict, Mapping, Optional, Sequence, Set, Union +from collections.abc import Mapping, Sequence, Set +from typing import TYPE_CHECKING, Any from lsst.daf.butler import ( Butler, @@ -64,7 +65,7 @@ def makeQuantum( task: PipelineTask, butler: Butler, dataId: DataId, - ioDataIds: Mapping[str, Union[DataId, Sequence[DataId]]], + ioDataIds: Mapping[str, DataId | Sequence[DataId]], ) -> Quantum: """Create a Quantum for a particular data ID(s). @@ -128,8 +129,8 @@ def makeQuantum( def _checkDimensionsMatch( universe: DimensionUniverse, - expected: Union[AbstractSet[str], AbstractSet[Dimension]], - actual: Union[AbstractSet[str], AbstractSet[Dimension]], + expected: Set[str] | Set[Dimension], + actual: Set[str] | Set[Dimension], ) -> None: """Test whether two sets of dimensions agree after conversions. @@ -151,9 +152,7 @@ def _checkDimensionsMatch( raise ValueError(f"Mismatch in dimensions; expected {expected} but got {actual}.") -def _simplify( - universe: DimensionUniverse, dimensions: Union[AbstractSet[str], AbstractSet[Dimension]] -) -> Set[str]: +def _simplify(universe: DimensionUniverse, dimensions: Set[str] | Set[Dimension]) -> set[str]: """Reduce a set of dimensions to a string-only form. Parameters @@ -169,7 +168,7 @@ def _simplify( A copy of ``dimensions`` reduced to string form, with all spatial dimensions simplified to ``skypix``. """ - simplified: Set[str] = set() + simplified: set[str] = set() for dimension in dimensions: # skypix not a real Dimension, handle it first if dimension == "skypix": @@ -184,7 +183,7 @@ def _simplify( return simplified -def _checkDataIdMultiplicity(name: str, dataIds: Union[DataId, Sequence[DataId]], multiple: bool) -> None: +def _checkDataIdMultiplicity(name: str, dataIds: DataId | Sequence[DataId], multiple: bool) -> None: """Test whether data IDs are scalars for scalar connections and sequences for multiple connections. @@ -211,7 +210,7 @@ def _checkDataIdMultiplicity(name: str, dataIds: Union[DataId, Sequence[DataId]] raise ValueError(f"Expected single data ID for {name}, got {dataIds}.") -def _normalizeDataIds(dataIds: Union[DataId, Sequence[DataId]]) -> Sequence[DataId]: +def _normalizeDataIds(dataIds: DataId | Sequence[DataId]) -> Sequence[DataId]: """Represent both single and multiple data IDs as a list. Parameters @@ -281,7 +280,7 @@ def _refFromConnection( def runTestQuantum( task: PipelineTask, butler: Butler, quantum: Quantum, mockRun: bool = True -) -> Optional[unittest.mock.Mock]: +) -> unittest.mock.Mock | None: """Run a PipelineTask on a Quantum. Parameters @@ -408,7 +407,7 @@ def assertValidInitOutput(task: PipelineTask) -> None: _assertAttributeMatchesConnection(task, name, connection) -def getInitInputs(butler: Butler, config: PipelineTaskConfig) -> Dict[str, Any]: +def getInitInputs(butler: Butler, config: PipelineTaskConfig) -> dict[str, Any]: """Return the initInputs object that would have been passed to a `~lsst.pipe.base.PipelineTask` constructor. diff --git a/python/lsst/pipe/base/tests/no_dimensions.py b/python/lsst/pipe/base/tests/no_dimensions.py index d63de4935..c11c7abc0 100644 --- a/python/lsst/pipe/base/tests/no_dimensions.py +++ b/python/lsst/pipe/base/tests/no_dimensions.py @@ -27,7 +27,7 @@ "NoDimensionsTestTask", ) -from typing import Dict, Union, cast +from typing import cast from lsst.pex.config import Field from lsst.pipe.base import ( @@ -75,7 +75,7 @@ class NoDimensionsTestTask(PipelineTask): # The completely flexible arguments to run aren't really valid inheritance; # the base class method exists just as a place to put a docstring, so we # tell mypy to ignore it. - def run(self, input: Union[TaskMetadata, Dict[str, int]]) -> Struct: # type: ignore + def run(self, input: TaskMetadata | dict[str, int]) -> Struct: # type: ignore """Run the task, adding the configured key-value pair to the input argument and returning it as the output. diff --git a/python/lsst/pipe/base/tests/simpleQGraph.py b/python/lsst/pipe/base/tests/simpleQGraph.py index c835fe011..8b50bf42a 100644 --- a/python/lsst/pipe/base/tests/simpleQGraph.py +++ b/python/lsst/pipe/base/tests/simpleQGraph.py @@ -28,7 +28,7 @@ import itertools import logging from collections.abc import Iterable, Mapping, MutableMapping -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union, cast +from typing import TYPE_CHECKING, Any, cast import lsst.daf.butler.tests as butlerTests import lsst.pex.config as pexConfig @@ -66,7 +66,7 @@ def __init__(self, *args: Any, **kwargs: Any): def getName() -> str: return "INSTRU" - def getRawFormatter(self, dataId: DataId) -> Type[Formatter]: + def getRawFormatter(self, dataId: DataId) -> type[Formatter]: return Formatter def register(self, registry: Registry, *, update: bool = False) -> None: @@ -124,7 +124,7 @@ class AddTask(PipelineTask): initout = numpy.array([999]) """InitOutputs for this task""" - taskFactory: Optional[AddTaskFactoryMock] = None + taskFactory: AddTaskFactoryMock | None = None """Factory that makes instances""" def run(self, input: int) -> Struct: # type: ignore @@ -163,7 +163,7 @@ def makeTask( return task -def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[TaskDef]]) -> None: +def registerDatasetTypes(registry: Registry, pipeline: Pipeline | Iterable[TaskDef]) -> None: """Register all dataset types used by tasks in a registry. Copied and modified from `PreExecInit.initializeDatasetTypes`. @@ -174,7 +174,7 @@ def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[ Registry instance. pipeline : `typing.Iterable` of `TaskDef` Iterable of TaskDef instances, likely the output of the method - toExpandedPipeline on a `~lsst.pipe.base.Pipeline` object + `Pipelines.toExpandedPipeline` on a `~lsst.pipe.base.Pipeline` object. """ for taskDef in pipeline: configDatasetType = DatasetType( @@ -201,12 +201,12 @@ def registerDatasetTypes(registry: Registry, pipeline: Union[Pipeline, Iterable[ registry.registerDatasetType(datasetType) -def makeSimplePipeline(nQuanta: int, instrument: Optional[str] = None) -> Pipeline: +def makeSimplePipeline(nQuanta: int, instrument: str | None = None) -> Pipeline: """Make a simple Pipeline for tests. This is called by `makeSimpleQGraph()` if no pipeline is passed to that function. It can also be used to customize the pipeline used by - ``makeSimpleQGraph`` function by calling this first and passing the result + `makeSimpleQGraph()` function by calling this first and passing the result to it. Parameters @@ -273,7 +273,7 @@ def makeSimpleButler( def populateButler( - pipeline: Pipeline, butler: Butler, datasetTypes: Dict[Optional[str], List[str]] | None = None + pipeline: Pipeline, butler: Butler, datasetTypes: dict[str | None, list[str]] | None = None ) -> None: """Populate data butler with data needed for test. @@ -350,21 +350,21 @@ def populateButler( def makeSimpleQGraph( nQuanta: int = 5, - pipeline: Optional[Pipeline] = None, - butler: Optional[Butler] = None, - root: Optional[str] = None, + pipeline: Pipeline | None = None, + butler: Butler | None = None, + root: str | None = None, callPopulateButler: bool = True, run: str = "test", - instrument: Optional[str] = None, + instrument: str | None = None, skipExistingIn: Any = None, inMemory: bool = True, userQuery: str = "", - datasetTypes: Optional[Dict[Optional[str], List[str]]] = None, + datasetTypes: dict[str | None, list[str]] | None = None, datasetQueryConstraint: DSQVariant = DSQVariant.ALL, makeDatastoreRecords: bool = False, - bind: Optional[Mapping[str, Any]] = None, - metadata: Optional[MutableMapping[str, Any]] = None, -) -> Tuple[Butler, QuantumGraph]: + bind: Mapping[str, Any] | None = None, + metadata: MutableMapping[str, Any] | None = None, +) -> tuple[Butler, QuantumGraph]: """Make simple `QuantumGraph` for tests. Makes simple one-task pipeline with AddTask, sets up in-memory registry From a774f2d0291590fe586d0cdaecc258fade6b1403 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 9 Jun 2023 15:59:58 -0700 Subject: [PATCH 09/10] Add some ruff configuration switches There are still very many warnings from ruff but this configuration matches our docstyle and naming ignores. --- pyproject.toml | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 17bbfb9c9..29adada60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,3 +136,63 @@ convention = "numpy" # not fit on one line. # Do not require docstrings in __init__.py files (D104) add-ignore = ["D107", "D105", "D102", "D100", "D200", "D205", "D400", "D104"] + +# The rule used with Ruff configuration is to disable every lint that has +# legitimate exceptions that are not dodgy code, rather than cluttering code +# with noqa markers. This is therefore a reiatively relaxed configuration that +# errs on the side of disabling legitimate lints. +# +# Reference for settings: https://beta.ruff.rs/docs/settings/ +# Reference for rules: https://beta.ruff.rs/docs/rules/ +[tool.ruff] +exclude = [ + "docs/**", +] +line-length = 110 +ignore = [ + "ANN101", # self should not have a type annotation + "ANN102", # cls should not have a type annotation + "ANN401", # sometimes Any is the right type + "ARG001", # unused function arguments are often legitimate + "ARG002", # unused method arguments are often legitimate + "ARG005", # unused lambda arguments are often legitimate + "BLE001", # we want to catch and report Exception in background tasks + "C414", # nested sorted is how you sort by multiple keys with reverse + "COM812", # omitting trailing commas allows black autoreformatting + "D102", # sometimes we use docstring inheritence + "D104", # don't see the point of documenting every package + "D105", # our style doesn't require docstrings for magic methods + "D106", # Pydantic uses a nested Config class that doesn't warrant docs + "EM101", # justification (duplicate string in traceback) is silly + "EM102", # justification (duplicate string in traceback) is silly + "FBT003", # positional booleans are normal for Pydantic field defaults + "G004", # forbidding logging f-strings is appealing, but not our style + "RET505", # disagree that omitting else always makes code more readable + "PLR0913", # factory pattern uses constructors with many arguments + "PLR2004", # too aggressive about magic values + "S105", # good idea but too many false positives on non-passwords + "S106", # good idea but too many false positives on non-passwords + "SIM102", # sometimes the formatting of nested if statements is clearer + "SIM117", # sometimes nested with contexts are clearer + "TCH001", # we decided to not maintain separate TYPE_CHECKING blocks + "TCH002", # we decided to not maintain separate TYPE_CHECKING blocks + "TCH003", # we decided to not maintain separate TYPE_CHECKING blocks + "TID252", # if we're going to use relative imports, use them always + "TRY003", # good general advice but lint is way too aggressive + "N802", + "N803", + "N806", + "N812", + "N815", + "N816", + "D107", + "D105", + "D102", + "D100", + "D200", + "D205", + "D400", + "D104", +] +select = ["ALL"] +target-version = "py311" From bfed2695af137c7add1e7011c3f7d7ce37dc12f6 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 9 Jun 2023 18:29:39 -0700 Subject: [PATCH 10/10] Doc fixes following review --- python/lsst/pipe/base/connections.py | 10 ++++---- .../lsst/pipe/base/executionButlerBuilder.py | 4 ++-- python/lsst/pipe/base/graph/graph.py | 16 ++++++------- python/lsst/pipe/base/graphBuilder.py | 12 +++++----- python/lsst/pipe/base/pipeTools.py | 11 +++++---- python/lsst/pipe/base/pipeline.py | 4 ++-- python/lsst/pipe/base/pipelineIR.py | 23 ++++++++++--------- 7 files changed, 41 insertions(+), 39 deletions(-) diff --git a/python/lsst/pipe/base/connections.py b/python/lsst/pipe/base/connections.py index 6789b9210..e02d99e2d 100644 --- a/python/lsst/pipe/base/connections.py +++ b/python/lsst/pipe/base/connections.py @@ -60,16 +60,16 @@ class ScalarError(TypeError): class PipelineTaskConnectionDict(UserDict): - """A special dict class used by PipelineTaskConnectionMetaclass + """A special dict class used by `PipelineTaskConnectionMetaclass`. - This dict is used in PipelineTaskConnection class creation, as the - dictionary that is initially used as __dict__. It exists to - intercept connection fields declared in a PipelineTaskConnection, and + This dict is used in `PipelineTaskConnection` class creation, as the + dictionary that is initially used as ``__dict__``. It exists to + intercept connection fields declared in a `PipelineTaskConnection`, and what name is used to identify them. The names are then added to class level list according to the connection type of the class attribute. The names are also used as keys in a class level dictionary associated with the corresponding class attribute. This information is a duplicate of - what exists in __dict__, but provides a simple place to lookup and + what exists in ``__dict__``, but provides a simple place to lookup and iterate on only these variables. """ diff --git a/python/lsst/pipe/base/executionButlerBuilder.py b/python/lsst/pipe/base/executionButlerBuilder.py index 6c462a693..638dc4e53 100644 --- a/python/lsst/pipe/base/executionButlerBuilder.py +++ b/python/lsst/pipe/base/executionButlerBuilder.py @@ -48,8 +48,8 @@ def _validate_dataset_type( ---------- candidate : `lsst.daf.butler.DatasetType` The candidate dataset type. - previous : `dict` [Union[`str`, `~lsst.daf.butler.DatasetType``], \ - `~lsst.daf.butler.DatasetType``] + previous : `dict` [ `str` | `~lsst.daf.butler.DatasetType`, \ + `~lsst.daf.butler.DatasetType`] Previous dataset types found, indexed by name and also by dataset type. The latter provides a quick way of returning a previously checked dataset type. diff --git a/python/lsst/pipe/base/graph/graph.py b/python/lsst/pipe/base/graph/graph.py index fda548da6..9fb294a6c 100644 --- a/python/lsst/pipe/base/graph/graph.py +++ b/python/lsst/pipe/base/graph/graph.py @@ -437,14 +437,14 @@ def getQuantaForTask(self, taskDef: TaskDef) -> frozenset[Quantum]: Returns ------- - frozenset of `~lsst.daf.butler.Quantum` + quanta : `frozenset` of `~lsst.daf.butler.Quantum` The `set` of `~lsst.daf.butler.Quantum` that is associated with the specified `TaskDef`. """ return frozenset(node.quantum for node in self._taskToQuantumNode.get(taskDef, ())) def getNumberOfQuantaForTask(self, taskDef: TaskDef) -> int: - """Return all the number of `~lsst.daf.butler.Quantum` associated with + """Return the number of `~lsst.daf.butler.Quantum` associated with a `TaskDef`. Parameters @@ -517,7 +517,7 @@ def findTaskWithOutput(self, datasetTypeName: DatasetTypeName) -> TaskDef | None Returns ------- result : `TaskDef` or `None` - `TaskDef` that outputs `DatasetTypeName` as an output or None if + `TaskDef` that outputs `DatasetTypeName` as an output or `None` if none of the tasks produce this `DatasetTypeName`. Raises @@ -841,7 +841,7 @@ def initInputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: Returns ------- - refs : `list` [ `lsst.daf.butler.DatasetRef` ] or None + refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` DatasetRef for the task InitInput, can be `None`. This can return either resolved or non-resolved reference. """ @@ -857,7 +857,7 @@ def initOutputRefs(self, taskDef: TaskDef) -> list[DatasetRef] | None: Returns ------- - refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or None + refs : `list` [ `~lsst.daf.butler.DatasetRef` ] or `None` DatasetRefs for the task InitOutput, can be `None`. This can return either resolved or non-resolved reference. Resolved reference will match Quantum's initInputs if this is an intermediate dataset type. @@ -900,14 +900,14 @@ def loadUri( ---------- uri : convertible to `~lsst.resources.ResourcePath` URI from where to load the graph. - universe : `~lsst.daf.butler.DimensionUniverse` optional + universe : `~lsst.daf.butler.DimensionUniverse`, optional `~lsst.daf.butler.DimensionUniverse` instance, not used by the method itself but needed to ensure that registry data structures are initialized. If `None` it is loaded from the `QuantumGraph` saved structure. If supplied, the `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` will be validated against the supplied argument for compatibility. - nodes : iterable of `uuid.UUID` or None + nodes : iterable of `uuid.UUID` or `None` UUIDs that correspond to nodes in the graph. If specified, only these nodes will be loaded. Defaults to None, in which case all nodes will be loaded. @@ -1201,7 +1201,7 @@ def load( saved structure. If supplied, the `~lsst.daf.butler.DimensionUniverse` from the loaded `QuantumGraph` will be validated against the supplied argument for compatibility. - nodes : iterable of `uuid.UUID` or None + nodes : iterable of `uuid.UUID` or `None` UUIDs that correspond to nodes in the graph. If specified, only these nodes will be loaded. Defaults to None, in which case all nodes will be loaded. diff --git a/python/lsst/pipe/base/graphBuilder.py b/python/lsst/pipe/base/graphBuilder.py index 11b38dc76..1e130010b 100644 --- a/python/lsst/pipe/base/graphBuilder.py +++ b/python/lsst/pipe/base/graphBuilder.py @@ -266,7 +266,7 @@ def unpackSingleRefs(self, storage_classes: dict[str, str]) -> NamedKeyDict[Data Returns ------- - dictionary : `NamedKeyDict` + dictionary : `~lsst.daf.butler.NamedKeyDict` Dictionary mapping `~lsst.daf.butler.DatasetType` to `~lsst.daf.butler.DatasetRef`, with both `~lsst.daf.butler.DatasetType` instances and string names usable @@ -291,7 +291,7 @@ def unpackMultiRefs(self, storage_classes: dict[str, str]) -> NamedKeyDict[Datas Returns ------- - dictionary : `NamedKeyDict` + dictionary : `~lsst.daf.butler.NamedKeyDict` Dictionary mapping `~lsst.daf.butler.DatasetType` to `list` of `~lsst.daf.butler.DatasetRef`, with both `~lsst.daf.butler.DatasetType` instances and string names usable @@ -419,7 +419,7 @@ def makeQuantum(self, datastore_records: Mapping[str, DatastoreRecordData] | Non Parameters ---------- - datastore_records : `dict` [ `str`, \ + datastore_records : `~collections.abc.Mapping` [ `str`, \ `~lsst.daf.butler.DatastoreRecordData` ], optional If not `None` then fill datastore records in each generated Quantum using the records from this structure. @@ -784,13 +784,13 @@ def __repr__(self) -> str: defaultDatasetQueryConstraints: NamedValueSet[DatasetType] """Datasets that should be used as constraints in the initial query, - according to tasks (`NamedValueSet`). + according to tasks (`~lsst.daf.butler.NamedValueSet`). """ dimensions: DimensionGraph """All dimensions used by any regular input, intermediate, or output (not prerequisite) dataset; the set of dimension used in the "Big Join - Query" (`DimensionGraph`). + Query" (`~lsst.daf.butler.DimensionGraph`). This is required to be a superset of all task quantum dimensions. """ @@ -1393,7 +1393,7 @@ def makeQuantumGraph( ---------- registry : `lsst.daf.butler.Registry` Registry for the data repository; used for all data ID queries. - metadata : Optional Mapping of `str` to primitives + metadata : `~collections.abc.Mapping` of `str` to primitives, optional This is an optional parameter of extra data to carry with the graph. Entries in this mapping should be able to be serialized in JSON. diff --git a/python/lsst/pipe/base/pipeTools.py b/python/lsst/pipe/base/pipeTools.py index d15246ada..0a6f249f1 100644 --- a/python/lsst/pipe/base/pipeTools.py +++ b/python/lsst/pipe/base/pipeTools.py @@ -92,11 +92,12 @@ def isPipelineOrdered(pipeline: Pipeline | Iterable[TaskDef], taskFactory: TaskF Raises ------ - `ImportError` is raised when task class cannot be imported. - `DuplicateOutputError` is raised when there is more than one producer for a - dataset type. - `MissingTaskFactoryError` is raised when TaskFactory is needed but not - provided. + ImportError + Raised when task class cannot be imported. + DuplicateOutputError + Raised when there is more than one producer for a dataset type. + MissingTaskFactoryError + Raised when TaskFactory is needed but not provided. """ # Build a map of DatasetType name to producer's index in a pipeline producerIndex = {} diff --git a/python/lsst/pipe/base/pipeline.py b/python/lsst/pipe/base/pipeline.py index abc42e7dd..781defcca 100644 --- a/python/lsst/pipe/base/pipeline.py +++ b/python/lsst/pipe/base/pipeline.py @@ -735,7 +735,7 @@ def write_to_uri(self, uri: ResourcePathExpression) -> None: uri : convertible to `~lsst.resources.ResourcePath` URI to write to; may have any scheme with `~lsst.resources.ResourcePath` write support or no scheme for a - local file/directory. Should have a ``.yaml``. + local file/directory. Should have a ``.yaml`` extension. """ self._pipelineIR.write_to_uri(uri) @@ -932,7 +932,7 @@ def makeDatasetTypesSet( is_input: bool, freeze: bool = True, ) -> NamedValueSet[DatasetType]: - """Construct a set of true `~lsst.daf.butler.DatasetType` objects + """Construct a set of true `~lsst.daf.butler.DatasetType` objects. Parameters ---------- diff --git a/python/lsst/pipe/base/pipelineIR.py b/python/lsst/pipe/base/pipelineIR.py index 6056edb77..f302f1e25 100644 --- a/python/lsst/pipe/base/pipelineIR.py +++ b/python/lsst/pipe/base/pipelineIR.py @@ -188,7 +188,7 @@ def to_primitives(self) -> dict[str, list[str] | str]: class ParametersIR: """Intermediate representation of parameters that are global to a pipeline - These parameters are specified under a top level key named `parameters` + These parameters are specified under a top level key named ``parameters`` and are declared as a yaml mapping. These entries can then be used inside task configuration blocks to specify configuration values. They may not be used in the special ``file`` or ``python`` blocks. @@ -196,17 +196,18 @@ class ParametersIR: Examples -------- .. code-block:: yaml + parameters: - shared_value: 14 - tasks: - taskA: - class: modA - config: - field1: parameters.shared_value - taskB: - class: modB - config: - field2: parameters.shared_value + shared_value: 14 + tasks: + taskA: + class: modA + config: + field1: parameters.shared_value + taskB: + class: modB + config: + field2: parameters.shared_value """ mapping: MutableMapping[str, str]