Restore previous pipetask run-qbb behavior.

It's not clear whether previous authors were aware that passing clobberOutputs=True and skipExisting=True in CmdLineFwk was previously doing nothing because SingleQuantumExecution executor shortcutted checkExistingOutputs when there was only a limited butler. But while it looks safe from a correctness standpoint to just drop that shortcut and let those options work as intended, that could mean a lot of new file existence checks in BPS jobs, so it's safer for this ticket to focus on enabling clobbering and skip-existing for other LimitedButlers while leaving run-qbb behavior strictly unchanged.
lsst · Jul 26, 2024 · c734367 · c734367
1 parent b03bb6c
commit c734367
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 2 deletions.
diff --git a/python/lsst/ctrl/mpexec/cmdLineFwk.py b/python/lsst/ctrl/mpexec/cmdLineFwk.py
@@ -994,8 +994,7 @@ def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
             exitOnKnownError=args.fail_fast,
             limited_butler_factory=_butler_factory,
             resources=resources,
-            clobberOutputs=True,
-            skipExisting=True,
+            assumeNoExistingOutputs=True,
         )
 
         timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout

diff --git a/python/lsst/ctrl/mpexec/singleQuantumExecutor.py b/python/lsst/ctrl/mpexec/singleQuantumExecutor.py
@@ -123,6 +123,12 @@ class SingleQuantumExecutor(QuantumExecutor):
         Unlike ``skipExistingIn``, this works with limited butlers as well as
         full butlers.  Always set to `True` if ``skipExistingIn`` matches
         ``butler.run``.
+    assumeNoExistingOutputs : `bool`, optional
+        If `True`, assume preexisting outputs are impossible (e.g. because this
+        is known by higher-level code to be a new ``RUN`` collection), and do
+        not look for them.  This causes the ``skipExisting`` and
+        ``clobberOutputs`` options to be ignored, but unlike just setting both
+        of those to `False`, it also avoids all dataset existence checks.
     """
 
     def __init__(
@@ -136,6 +142,7 @@ def __init__(
         limited_butler_factory: Callable[[Quantum], LimitedButler] | None = None,
         resources: ExecutionResources | None = None,
         skipExisting: bool = False,
+        assumeNoExistingOutputs: bool = False,
     ):
         self.butler = butler
         self.taskFactory = taskFactory
@@ -144,6 +151,7 @@ def __init__(
         self.exitOnKnownError = exitOnKnownError
         self.limited_butler_factory = limited_butler_factory
         self.resources = resources
+        self.assumeNoExistingOutputs = assumeNoExistingOutputs
 
         if self.butler is None:
             assert limited_butler_factory is not None, "limited_butler_factory is needed when butler is None"
@@ -345,6 +353,9 @@ def checkExistingOutputs(
         """
         task_node = self._conform_task_def(task_node)
 
+        if self.assumeNoExistingOutputs:
+            return False
+
         if self.skipExisting:
             _LOG.debug(
                 "Checking existence of metadata from previous execution of label=%s dataId=%s.",