Skip to content

Commit

Permalink
Restore previous pipetask run-qbb behavior.
Browse files Browse the repository at this point in the history
It's not clear whether previous authors were aware that passing
clobberOutputs=True and skipExisting=True in CmdLineFwk was previously
doing nothing because SingleQuantumExecution executor shortcutted
checkExistingOutputs when there was only a limited butler.

But while it looks safe from a correctness standpoint to just drop
that shortcut and let those options work as intended, that could mean
a lot of new file existence checks in BPS jobs, so it's safer for this
ticket to focus on enabling clobbering and skip-existing for other
LimitedButlers while leaving run-qbb behavior strictly unchanged.
  • Loading branch information
TallJimbo authored and mfisherlevine committed Jul 26, 2024
1 parent b03bb6c commit c734367
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
3 changes: 1 addition & 2 deletions python/lsst/ctrl/mpexec/cmdLineFwk.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,8 +994,7 @@ def runGraphQBB(self, task_factory: TaskFactory, args: SimpleNamespace) -> None:
exitOnKnownError=args.fail_fast,
limited_butler_factory=_butler_factory,
resources=resources,
clobberOutputs=True,
skipExisting=True,
assumeNoExistingOutputs=True,
)

timeout = self.MP_TIMEOUT if args.timeout is None else args.timeout
Expand Down
11 changes: 11 additions & 0 deletions python/lsst/ctrl/mpexec/singleQuantumExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ class SingleQuantumExecutor(QuantumExecutor):
Unlike ``skipExistingIn``, this works with limited butlers as well as
full butlers. Always set to `True` if ``skipExistingIn`` matches
``butler.run``.
assumeNoExistingOutputs : `bool`, optional
If `True`, assume preexisting outputs are impossible (e.g. because this
is known by higher-level code to be a new ``RUN`` collection), and do
not look for them. This causes the ``skipExisting`` and
``clobberOutputs`` options to be ignored, but unlike just setting both
of those to `False`, it also avoids all dataset existence checks.
"""

def __init__(
Expand All @@ -136,6 +142,7 @@ def __init__(
limited_butler_factory: Callable[[Quantum], LimitedButler] | None = None,
resources: ExecutionResources | None = None,
skipExisting: bool = False,
assumeNoExistingOutputs: bool = False,
):
self.butler = butler
self.taskFactory = taskFactory
Expand All @@ -144,6 +151,7 @@ def __init__(
self.exitOnKnownError = exitOnKnownError
self.limited_butler_factory = limited_butler_factory
self.resources = resources
self.assumeNoExistingOutputs = assumeNoExistingOutputs

if self.butler is None:
assert limited_butler_factory is not None, "limited_butler_factory is needed when butler is None"
Expand Down Expand Up @@ -345,6 +353,9 @@ def checkExistingOutputs(
"""
task_node = self._conform_task_def(task_node)

if self.assumeNoExistingOutputs:
return False

if self.skipExisting:
_LOG.debug(
"Checking existence of metadata from previous execution of label=%s dataId=%s.",
Expand Down

0 comments on commit c734367

Please sign in to comment.