From c734367ae759f7718048ddb57f33978ea64c09aa Mon Sep 17 00:00:00 2001
From: Jim Bosch <jbosch@astro.princeton.edu>
Date: Fri, 5 Jul 2024 11:23:56 -0400
Subject: [PATCH] Restore previous pipetask run-qbb behavior.

It's not clear whether previous authors were aware that passing
clobberOutputs=True and skipExisting=True in CmdLineFwk was previously
doing nothing because SingleQuantumExecution executor shortcutted
checkExistingOutputs when there was only a limited butler.

But while it looks safe from a correctness standpoint to just drop
that shortcut and let those options work as intended, that could mean
a lot of new file existence checks in BPS jobs, so it's safer for this
ticket to focus on enabling clobbering and skip-existing for other
LimitedButlers while leaving run-qbb behavior strictly unchanged.
---
 python/lsst/ctrl/mpexec/cmdLineFwk.py            |  3 +--
 python/lsst/ctrl/mpexec/singleQuantumExecutor.py | 11 +++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/python/lsst/ctrl/mpexec/cmdLineFwk.py b/python/lsst/ctrl/mpexec/cmdLineFwk.py
index e5b959f4..c085e8a3 100644
--- a/python/lsst/ctrl/mpexec/cmdLineFwk.py
+++ b/python/lsst/ctrl/mpexec/cmdLineFwk.py
@@ -994,8 +994,7 @@ def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
             exitOnKnownError=args.fail_fast,
             limited_butler_factory=_butler_factory,
             resources=resources,
-            clobberOutputs=True,
-            skipExisting=True,
+            assumeNoExistingOutputs=True,
         )
 
         timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
diff --git a/python/lsst/ctrl/mpexec/singleQuantumExecutor.py b/python/lsst/ctrl/mpexec/singleQuantumExecutor.py
index 81b49844..3987dba2 100644
--- a/python/lsst/ctrl/mpexec/singleQuantumExecutor.py
+++ b/python/lsst/ctrl/mpexec/singleQuantumExecutor.py
@@ -123,6 +123,12 @@ class SingleQuantumExecutor(QuantumExecutor):
         Unlike ``skipExistingIn``, this works with limited butlers as well as
         full butlers.  Always set to `True` if ``skipExistingIn`` matches
         ``butler.run``.
+    assumeNoExistingOutputs : `bool`, optional
+        If `True`, assume preexisting outputs are impossible (e.g. because this
+        is known by higher-level code to be a new ``RUN`` collection), and do
+        not look for them.  This causes the ``skipExisting`` and
+        ``clobberOutputs`` options to be ignored, but unlike just setting both
+        of those to `False`, it also avoids all dataset existence checks.
     """
 
     def __init__(
@@ -136,6 +142,7 @@ def __init__(
         limited_butler_factory: Callable[[Quantum], LimitedButler] | None = None,
         resources: ExecutionResources | None = None,
         skipExisting: bool = False,
+        assumeNoExistingOutputs: bool = False,
     ):
         self.butler = butler
         self.taskFactory = taskFactory
@@ -144,6 +151,7 @@ def __init__(
         self.exitOnKnownError = exitOnKnownError
         self.limited_butler_factory = limited_butler_factory
         self.resources = resources
+        self.assumeNoExistingOutputs = assumeNoExistingOutputs
 
         if self.butler is None:
             assert limited_butler_factory is not None, "limited_butler_factory is needed when butler is None"
@@ -345,6 +353,9 @@ def checkExistingOutputs(
         """
         task_node = self._conform_task_def(task_node)
 
+        if self.assumeNoExistingOutputs:
+            return False
+
         if self.skipExisting:
             _LOG.debug(
                 "Checking existence of metadata from previous execution of label=%s dataId=%s.",