From a470391786176dab785dd8904a4deb88e9eac7e6 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 8 Jun 2023 15:29:45 -0400
Subject: [PATCH 01/20] Add a node for doing a bunch of decls in a row which
 are the only thing you can do a bunch of in a row.

---
 src/toil/wdl/wdltoil.py | 43 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 651618598a..3f3a390921 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -1364,6 +1364,49 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
         else:
             raise WDL.Error.InvalidType(self._node, "Unimplemented WorkflowNode: " + str(type(self._node)))
 
+class WDLWorkflowNodeListJob(WDLBaseJob):
+    """
+    Job that evaluates a list of WDL workflow nodes, which are in the same
+    scope and in a topological dependency order, and which do not call out to any other
+    workflows or tasks or sections.
+    """
+
+    def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, **kwargs: Any) -> None:
+        """
+        Make a new job to run a list of workflow nodes to completion.
+        """
+        super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, **kwargs)
+
+        self._nodes = nodes
+        self._prev_node_results = prev_node_results
+        self._namespace = namespace
+
+        for n in self._nodes:
+            if isinstance(n, (WDL.Tree.Call, WDL.Tree.Scatter, WDL.Tree.Conditional)):
+                raise RuntimeError("Node cannot be evaluated with other nodes: " + str(n))
+
+    def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
+        """
+        Actually execute the workflow nodes.
+        """
+        
+        # Combine the bindings we get from previous jobs
+        current_bindings = combine_bindings(unwrap_all(self._prev_node_results))
+        # Set up the WDL standard library
+        standard_library = ToilWDLStdLibBase(file_store)
+
+        for node in self._nodes:
+            if isinstance(node, WDL.Tree.Decl):
+                # This is a variable assignment
+                logger.info('Setting %s to %s', node.name, node.expr)
+                value = evaluate_decl(node, current_bindings, standard_library)
+                current_bindings = current_bindings.bind(node.name, value)
+            else:
+                raise WDL.Error.InvalidType(self._node, "Unimplemented WorkflowNode: " + str(type(node)))
+
+        return current_bindings
+
+
 class WDLCombineBindingsJob(WDLBaseJob):
     """
     Job that collects the results from WDL workflow nodes and combines their

From aba6a9dbdbf9290d318f7e01d9a9736df4c6b3dd Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 8 Jun 2023 16:35:47 -0400
Subject: [PATCH 02/20] Remove custom topological sort for WDL in favor of
 Graphlib

---
 requirements-wdl.txt    |   1 +
 src/toil/wdl/wdltoil.py | 199 ++++++++++++++++++++++------------------
 2 files changed, 112 insertions(+), 88 deletions(-)

diff --git a/requirements-wdl.txt b/requirements-wdl.txt
index 3bd0ff1d10..9cbc802d1b 100644
--- a/requirements-wdl.txt
+++ b/requirements-wdl.txt
@@ -1,2 +1,3 @@
 miniwdl==1.9.1
 wdlparse==0.1.0
+graphlib-backport==1.0 ; python_version < '3.9'
diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 3f3a390921..4bf77d580b 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -32,6 +32,7 @@
 import uuid
 
 from contextlib import ExitStack
+from graphlib import TopologicalSorter
 from typing import cast, Any, Callable, Union, Dict, List, Optional, Set, Sequence, Tuple, Type, TypeVar, Iterator
 from urllib.parse import urlsplit, urljoin, quote, unquote
 
@@ -1375,7 +1376,7 @@ def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequen
         """
         Make a new job to run a list of workflow nodes to completion.
         """
-        super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, **kwargs)
+        super().__init__(unitName=nodes[0].workflow_node_id + '+', displayName=nodes[0].workflow_node_id + '+', **kwargs)
 
         self._nodes = nodes
         self._prev_node_results = prev_node_results
@@ -1402,7 +1403,7 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
                 value = evaluate_decl(node, current_bindings, standard_library)
                 current_bindings = current_bindings.bind(node.name, value)
             else:
-                raise WDL.Error.InvalidType(self._node, "Unimplemented WorkflowNode: " + str(type(node)))
+                raise WDL.Error.InvalidType(node, "Unimplemented WorkflowNode: " + str(type(node)))
 
         return current_bindings
 
@@ -1462,6 +1463,102 @@ def run(self, file_store: AbstractFileStore) -> WDLBindings:
         super().run(file_store)
         return combine_bindings(unwrap_all(self._prev_node_results)).wrap_namespace(self._namespace)
 
+class WDLWorkflowGraph:
+    """
+    Represents a graph of WDL WorkflowNodes.
+
+    Operates at a certain level of instantiation (i.e. sub-sections are
+    represented by single nodes).
+
+    Assumes all relevant nodes are provided; dependencies outside the provided
+    nodes are assumed to be satisfied already.
+    """
+
+    def __init__(self, nodes: Sequence[WDL.Tree.WorkflowNode]) -> None:
+        """
+        Make a graph for analyzing a set of workflow nodes.
+        """
+
+        # For Gather nodes, the Toil interpreter handles them as part of their
+        # associated section. So make a map from gather ID to the section node
+        # ID.
+        self._gather_to_section: Dict[str, str] = {}
+        for node in nodes:
+            if isinstance(node, WDL.Tree.WorkflowSection):
+                for gather_node in node.gathers.values():
+                    self._gather_to_section[gather_node.workflow_node_id] = node.workflow_node_id
+
+        # Store all the nodes by ID, except the gathers which we elide.
+        self._nodes: Dict[str, WDL.Tree.WorkflowNode] = {node.workflow_node_id: node for node in nodes if not isinstance(node, WDL.Tree.Gather)}
+
+    def real_id(self, node_id: str) -> str:
+        """
+        Map multiple IDs for what we consider the same node to one ID.
+
+        This elides/resolves gathers.
+        """
+        return self._gather_to_section.get(node_id, node_id)
+
+    def get(self, node_id: str) -> WDL.Tree.WorkflowNode:
+        """
+        Get a node by ID.
+        """
+        return self._nodes[self.real_id(node_id)]
+
+    def get_dependencies(self, node_id: str) -> List[str]:
+        """
+        Get all the nodes that a node depends on.
+
+        Produces dependencies after resolving gathers and internal-to-section
+        dependencies, on nodes that are also in this graph.
+        """
+
+        # We need to make sure to bubble up dependencies from inside sections.
+        # A conditional might only appear to depend on the variables in the
+        # conditional expression, but its body can depend on other stuff, and
+        # we need to make sure that that stuff has finished and updated the
+        # environment before the conditional body runs. TODO: This is because
+        # Toil can't go and get and add successors to the relevant jobs later,
+        # while MiniWDL's engine apparently can. This ends up reducing
+        # parallelism more than would strictly be necessary; nothing in the
+        # conditional can start until the dependencies of everything in the
+        # conditional are ready.
+
+        dependencies = set()
+
+        node = self.get(node_id)
+        for dependency in recursive_dependencies(node):
+            real_dependency = self.real_id(dependency)
+            if real_dependency in self._nodes:
+                dependencies.add(real_dependency)
+
+        return list(dependencies)
+
+    def topological_order(self) -> List[str]:
+        """
+        Get a topological order of the nodes, based on their dependencies.
+        """
+
+        sorter : TopologicalSorter[str] = TopologicalSorter()
+        for node_id in self._nodes.keys():
+            for dependency in self.get_dependencies(node_id):
+                # Add all the edges
+                sorter.add(node_id, dependency)
+        return list(sorter.static_order())
+
+    def leaves(self) -> List[str]:
+        """
+        Get all the workflow node IDs that have no dependents in the graph.
+        """
+
+        leaves = set(self._nodes.keys())
+        for node_id in self._nodes.keys():
+            for dependency in self.get_dependencies(node_id):
+                # Mark everything depended on as not a leaf
+                leaves.remove(dependency)
+        return list(leaves)
+
+
 class WDLSectionJob(WDLBaseJob):
     """
     Job that can create more graph for a section of the wrokflow.
@@ -1493,95 +1590,31 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
                at the end of the section.
         """
 
-        # We need to track the dependency universe; some of our child nodes may
-        # depend on nodes that are e.g. inputs to the workflow that encloses
-        # the section that encloses this section, and we need to just assume
-        # those are already available, even though we don't have access to the
-        # complete list. So we make a set of everything we actually do need to
-        # care about resolving, instead.
-        dependabes: Set[str] = set()
-
         if local_environment is not None:
             # Bring local environment into scope
             environment = combine_bindings([environment, local_environment])
 
-        # What nodes exist, under their IDs?
-        wdl_id_to_wdl_node: Dict[str, WDL.Tree.WorkflowNode] = {node.workflow_node_id: node for node in nodes if isinstance(node, WDL.Tree.WorkflowNode)}
-        dependabes |= set(wdl_id_to_wdl_node.keys())
-
-        # That doesn't include gather nodes, which in the Toil interpreter we
-        # handle as part of their enclosing section, without individual Toil
-        # jobs for each. So make a map from gather ID to the section node ID.
-        gather_to_section: Dict[str, str] = {}
-        for node in nodes:
-            if isinstance(node, WDL.Tree.WorkflowSection):
-                for gather_node in node.gathers.values():
-                    gather_to_section[gather_node.workflow_node_id] = node.workflow_node_id
-        dependabes |= set(gather_to_section.keys())
+        # Make a graph of all the nodes at this level
+        section_graph = WDLWorkflowGraph(nodes)
 
         # To make Toil jobs, we need all the jobs they depend on made so we can
         # call .rv(). So we need to solve the workflow DAG ourselves to set it up
         # properly.
+           
+        wdl_id_to_toil_job: Dict[str, WDLBaseJob] = {}
 
-        # We also need to make sure to bubble up dependencies from inside
-        # sections. A conditional might only appear to depend on the variables
-        # in the conditional expression, but its body can depend on other
-        # stuff, and we need to make sure that that stuff has finished and
-        # updated the environment before the conditional body runs. TODO: This
-        # is because Toil can't go and get and add successors to the relevant
-        # jobs later, while MiniWDL's engine apparently can. This ends up
-        # reducing parallelism more than would strictly be necessary; nothing
-        # in the conditional can start until the dependencies of everything in
-        # the conditional are ready.
-
-        # What are the dependencies of all the body nodes on other body nodes?
-        # Nodes can depend on other nodes actually in the tree, or on gathers
-        # that belong to other nodes, but we rewrite the gather dependencies
-        # through to the enclosing section node. Skip any dependencies on
-        # anything not provided by another body node (such as on an input, or
-        # something outside of the current section). TODO: This will need to
-        # change if we let parallelism transcend sections.
-        wdl_id_to_dependency_ids = {node_id: list({gather_to_section[dep] if dep in gather_to_section else dep for dep in recursive_dependencies(node) if dep in dependabes}) for node_id, node in wdl_id_to_wdl_node.items()}
-
-        # Which of those are outstanding?
-        wdl_id_to_outstanding_dependency_ids = copy.deepcopy(wdl_id_to_dependency_ids)
-
-        # What nodes depend on each node?
-        wdl_id_to_dependent_ids: Dict[str, Set[str]] = collections.defaultdict(set)
-        for node_id, dependencies in wdl_id_to_dependency_ids.items():
-            for dependency_id in dependencies:
-                # Invert the dependency edges
-                wdl_id_to_dependent_ids[dependency_id].add(node_id)
-
-        # This will hold all the Toil jobs by WDL node ID
-        wdl_id_to_toil_job: Dict[str, Job] = {}
-
-        # And collect IDs of jobs with no successors to add a final sink job
-        leaf_ids: Set[str] = set()
-
-        # What nodes are ready?
-        ready_node_ids = {node_id for node_id, dependencies in wdl_id_to_outstanding_dependency_ids.items() if len(dependencies) == 0}
-
-        while len(wdl_id_to_outstanding_dependency_ids) > 0:
-            logger.debug('Ready nodes: %s', ready_node_ids)
-            logger.debug('Waiting nodes: %s', wdl_id_to_outstanding_dependency_ids)
-
-            # Find a node that we can do now
-            node_id = next(iter(ready_node_ids))
-
-            # Say we are doing it
-            ready_node_ids.remove(node_id)
-            del wdl_id_to_outstanding_dependency_ids[node_id]
+        for node_id in section_graph.topological_order():
+            # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
+            prev_jobs = [wdl_id_to_toil_job[prev_node_id] for prev_node_id in section_graph.get_dependencies(node_id)]
+            
             logger.debug('Make Toil job for %s', node_id)
 
-            # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
-            prev_jobs = [wdl_id_to_toil_job[prev_node_id] for prev_node_id in wdl_id_to_dependency_ids[node_id] if prev_node_id in wdl_id_to_toil_job]
             rvs: List[Union[WDLBindings, Promise]] = [prev_job.rv() for prev_job in prev_jobs]
             # We also need access to section-level bindings like inputs
             rvs.append(environment)
 
             # Use them to make a new job
-            job = WDLWorkflowNodeJob(wdl_id_to_wdl_node[node_id], rvs, self._namespace)
+            job = WDLWorkflowNodeJob(section_graph.get(node_id), rvs, self._namespace)
             for prev_job in prev_jobs:
                 # Connect up the happens-after relationships to make sure the
                 # return values are available.
@@ -1595,19 +1628,9 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
 
             # Save the job
             wdl_id_to_toil_job[node_id] = job
-
-            if len(wdl_id_to_dependent_ids[node_id]) == 0:
-                # Nothing comes after this job, so connect it to sink
-                leaf_ids.add(node_id)
-            else:
-                for dependent_id in wdl_id_to_dependent_ids[node_id]:
-                    # For each job that waits on this job
-                    wdl_id_to_outstanding_dependency_ids[dependent_id].remove(node_id)
-                    logger.debug('Dependent %s no longer needs to wait on %s', dependent_id, node_id)
-                    if len(wdl_id_to_outstanding_dependency_ids[dependent_id]) == 0:
-                        # We were the last thing blocking them.
-                        ready_node_ids.add(dependent_id)
-                        logger.debug('Dependent %s is now ready', dependent_id)
+        
+        # Find all the leaves
+        leaf_ids = section_graph.leaves()
 
         # Make the sink job
         leaf_rvs: List[Union[WDLBindings, Promise]] = [wdl_id_to_toil_job[node_id].rv() for node_id in leaf_ids]

From 2f629db83e56fffeb593777751c3a796863b8532 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 8 Jun 2023 16:57:09 -0400
Subject: [PATCH 03/20] Get test_miniwdl_self_test passing on mac with
 DOCKER_HOST set

---
 src/toil/test/__init__.py         | 10 ++++++++++
 src/toil/test/wdl/wdltoil_test.py | 16 ++++++++--------
 src/toil/wdl/wdltoil.py           | 12 +++++++-----
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/src/toil/test/__init__.py b/src/toil/test/__init__.py
index 6f51bb724c..53f4e4fae5 100644
--- a/src/toil/test/__init__.py
+++ b/src/toil/test/__init__.py
@@ -582,6 +582,16 @@ def needs_singularity(test_item: MT) -> MT:
         return test_item
     else:
         return unittest.skip("Install singularity to include this test.")(test_item)
+
+def needs_singularity_or_docker(test_item: MT) -> MT:
+    """
+    Use as a decorator before test classes or methods to only run them if
+    singularity OR Docker is installed.
+    """
+    if which('singularity'):
+        return _mark_test('singularity', test_item)
+    else:
+        return needs_docker(test_item)
         
 def needs_local_cuda(test_item: MT) -> MT:
     """
diff --git a/src/toil/test/wdl/wdltoil_test.py b/src/toil/test/wdl/wdltoil_test.py
index 050c6ef207..e7ea736164 100644
--- a/src/toil/test/wdl/wdltoil_test.py
+++ b/src/toil/test/wdl/wdltoil_test.py
@@ -9,7 +9,7 @@
 
 import pytest
 
-from toil.test import ToilTest, needs_docker, needs_docker_cuda, needs_java, needs_singularity, slow
+from toil.test import ToilTest, needs_docker_cuda, needs_java, needs_singularity_or_docker, slow
 from toil.version import exactPython
 # Don't import the test case directly or pytest will test it again.
 import toil.test.wdl.toilwdlTest
@@ -24,9 +24,9 @@ def setUpClass(cls) -> None:
         """Runs once for all tests."""
         cls.base_command = [exactPython, '-m', 'toil.wdl.wdltoil']
 
-    # We inherit a testMD5sum but it is going to need Singularity and not
-    # Docker now. And also needs to have a WDL 1.0+ WDL file. So we replace it.
-    @needs_singularity
+    # We inherit a testMD5sum but it is going to need Singularity or Docker
+    # now. And also needs to have a WDL 1.0+ WDL file. So we replace it.
+    @needs_singularity_or_docker
     def testMD5sum(self):
         """Test if toilwdl produces the same outputs as known good outputs for WDL's
         GATK tutorial #1."""
@@ -53,13 +53,13 @@ def test_empty_file_path(self):
         assert retval != 0
         assert b'Could not find' in stderr
 
-    @needs_singularity
+    @needs_singularity_or_docker
     def test_miniwdl_self_test(self):
         """Test if the MiniWDL self test runs and produces the expected output."""
         wdl_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/self_test.wdl')
         json_file = os.path.abspath('src/toil/test/wdl/miniwdl_self_test/inputs.json')
 
-        result_json = subprocess.check_output(self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl'])
+        result_json = subprocess.check_output(self.base_command + [wdl_file, json_file, '--logDebug', '-o', self.output_dir, '--outputDialect', 'miniwdl'])
         result = json.loads(result_json)
 
         # Expect MiniWDL-style output with a designated "dir"
@@ -85,7 +85,7 @@ def test_miniwdl_self_test(self):
 
     @slow
     @needs_docker_cuda
-    @needs_singularity
+    @needs_singularity_or_docker
     def test_giraffe_deepvariant(self):
         """Test if Giraffe and CPU DeepVariant run. This could take 25 minutes."""
         # TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
@@ -128,7 +128,7 @@ def test_giraffe_deepvariant(self):
         assert os.path.exists(outputs['GiraffeDeepVariant.output_vcf'])
 
     @slow
-    @needs_singularity
+    @needs_singularity_or_docker
     def test_giraffe(self):
         """Test if Giraffe runs. This could take 12 minutes. Also we scale it down."""
         # TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 4bf77d580b..e36d043e9e 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -264,7 +264,6 @@ def for_each_node(root: WDL.Tree.WorkflowNode) -> Iterator[WDL.Tree.WorkflowNode
     internal nodes of conditionals and scatters, and gather nodes.
     """
 
-    logger.debug('WorkflowNode: %s: %s %s', type(root), root, root.workflow_node_id)
     yield root
     for child_node in root.children:
         if isinstance(child_node, WDL.Tree.WorkflowNode):
@@ -1541,9 +1540,8 @@ def topological_order(self) -> List[str]:
 
         sorter : TopologicalSorter[str] = TopologicalSorter()
         for node_id in self._nodes.keys():
-            for dependency in self.get_dependencies(node_id):
-                # Add all the edges
-                sorter.add(node_id, dependency)
+            # Add all the edges
+            sorter.add(node_id, *self.get_dependencies(node_id))
         return list(sorter.static_order())
 
     def leaves(self) -> List[str]:
@@ -1603,7 +1601,11 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
            
         wdl_id_to_toil_job: Dict[str, WDLBaseJob] = {}
 
-        for node_id in section_graph.topological_order():
+        creation_order = section_graph.topological_order()
+
+        logger.debug('Creation order: %s', creation_order)
+
+        for node_id in creation_order:
             # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
             prev_jobs = [wdl_id_to_toil_job[prev_node_id] for prev_node_id in section_graph.get_dependencies(node_id)]
             

From a8b6869adce6f1f743396ad9678534142e173b01 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 8 Jun 2023 17:27:44 -0400
Subject: [PATCH 04/20] Fix detection of nonexistent inputs

---
 src/toil/common.py                     |  2 +-
 src/toil/jobStores/abstractJobStore.py | 52 ++++++++++++++++----------
 src/toil/jobStores/aws/jobStore.py     |  2 +-
 src/toil/test/__init__.py              |  4 +-
 src/toil/test/wdl/wdltoil_test.py      |  2 +-
 src/toil/wdl/wdltoil.py                | 17 ++++++---
 6 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/src/toil/common.py b/src/toil/common.py
index b94aab56d3..ef9f63c6e3 100644
--- a/src/toil/common.py
+++ b/src/toil/common.py
@@ -239,7 +239,7 @@ def prepare_restart(self) -> None:
         # auto-generated and point to a temp directory that could no longer
         # exist and that can't safely be re-made.
         self.write_messages = None
-        
+
 
     def setOptions(self, options: Namespace) -> None:
         """Creates a config object from the options object."""
diff --git a/src/toil/jobStores/abstractJobStore.py b/src/toil/jobStores/abstractJobStore.py
index f6f848a5ae..446d03a5c3 100644
--- a/src/toil/jobStores/abstractJobStore.py
+++ b/src/toil/jobStores/abstractJobStore.py
@@ -43,11 +43,10 @@
     from typing_extensions import Literal
 
 from urllib.parse import ParseResult, urlparse
+from urllib.error import HTTPError
 from urllib.request import urlopen
 from uuid import uuid4
 
-from requests.exceptions import HTTPError
-
 from toil.common import Config, getNodeID, safeUnpickleFromStream
 from toil.fileStores import FileID
 from toil.job import (CheckpointJobDescription,
@@ -420,6 +419,8 @@ def import_file(self,
             - 'gs'
                 e.g. gs://bucket/file
 
+        Raises FileNotFoundError if the file does not exist.
+
         :param str src_uri: URL that points to a file or object in the storage mechanism of a
                 supported URL scheme e.g. a blob in an AWS s3 bucket. It must be a file, not a
                 directory or prefix.
@@ -453,6 +454,8 @@ def _import_file(self,
         asks the other job store class for a stream and writes that stream as either a regular or
         a shared file.
 
+        Raises FileNotFoundError if the file does not exist.
+
         :param AbstractJobStore otherCls: The concrete subclass of AbstractJobStore that supports
                reading from the given URL and getting the file size from the URL.
 
@@ -620,6 +623,8 @@ def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> Tuple[int, boo
 
         Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
 
+        Raises FileNotFoundError if the thing at the URL is not found.
+
         :param ParseResult url: URL that points to a file or object in the storage
                mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
 
@@ -1704,23 +1709,32 @@ def get_size(cls, url: ParseResult) -> Optional[int]:
     def _read_from_url(
         cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
     ) -> Tuple[int, bool]:
-        # We can only retry on errors that happen as responses to the request.
-        # If we start getting file data, and the connection drops, we fail.
-        # So we don't have to worry about writing the start of the file twice.
-        with closing(urlopen(url.geturl())) as readable:
-            # Make something to count the bytes we get
-            # We need to put the actual count in a container so our
-            # nested function can modify it without creating its own
-            # local with the same name.
-            size = [0]
-            def count(l: int) -> None:
-                size[0] += l
-            counter = WriteWatchingStream(writable)
-            counter.onWrite(count)
-
-            # Do the download
-            shutil.copyfileobj(readable, counter)
-            return size[0], False
+
+        try:
+            # We can only retry on errors that happen as responses to the request.
+            # If we start getting file data, and the connection drops, we fail.
+            # So we don't have to worry about writing the start of the file twice.
+            with closing(urlopen(url.geturl())) as readable:
+                # Make something to count the bytes we get
+                # We need to put the actual count in a container so our
+                # nested function can modify it without creating its own
+                # local with the same name.
+                size = [0]
+                def count(l: int) -> None:
+                    size[0] += l
+                counter = WriteWatchingStream(writable)
+                counter.onWrite(count)
+
+                # Do the download
+                shutil.copyfileobj(readable, counter)
+                return size[0], False
+        except HTTPError as e:
+            if e.code == 404:
+                # Translate into a FileNotFoundError for detecting
+                # un-importable files
+                raise FileNotFoundError from e
+            else:
+                raise
 
     @classmethod
     def _get_is_directory(cls, url: ParseResult) -> bool:
diff --git a/src/toil/jobStores/aws/jobStore.py b/src/toil/jobStores/aws/jobStore.py
index 4b57f2c612..b468c85d18 100644
--- a/src/toil/jobStores/aws/jobStore.py
+++ b/src/toil/jobStores/aws/jobStore.py
@@ -757,7 +757,7 @@ def bucket_retry_predicate(error):
                                 bucket_tagging.put(Tagging={'TagSet': flat_tags})
 
                             # Configure bucket so that we can make objects in
-                            # it public, which was the historical default. 
+                            # it public, which was the historical default.
                             enable_public_objects(bucket_name)
                         elif block:
                             raise
diff --git a/src/toil/test/__init__.py b/src/toil/test/__init__.py
index 53f4e4fae5..539dc00e31 100644
--- a/src/toil/test/__init__.py
+++ b/src/toil/test/__init__.py
@@ -571,7 +571,7 @@ def needs_docker(test_item: MT) -> MT:
         return test_item
     else:
         return unittest.skip("Install docker to include this test.")(test_item)
-        
+
 def needs_singularity(test_item: MT) -> MT:
     """
     Use as a decorator before test classes or methods to only run them if
@@ -592,7 +592,7 @@ def needs_singularity_or_docker(test_item: MT) -> MT:
         return _mark_test('singularity', test_item)
     else:
         return needs_docker(test_item)
-        
+
 def needs_local_cuda(test_item: MT) -> MT:
     """
     Use as a decorator before test classes or methods to only run them if
diff --git a/src/toil/test/wdl/wdltoil_test.py b/src/toil/test/wdl/wdltoil_test.py
index e7ea736164..2d4eb50534 100644
--- a/src/toil/test/wdl/wdltoil_test.py
+++ b/src/toil/test/wdl/wdltoil_test.py
@@ -130,7 +130,7 @@ def test_giraffe_deepvariant(self):
     @slow
     @needs_singularity_or_docker
     def test_giraffe(self):
-        """Test if Giraffe runs. This could take 12 minutes. Also we scale it down."""
+        """Test if Giraffe runs. This could take 12 minutes. Also we scale it down but it still demands lots of memory."""
         # TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
 
         json_dir = self._createTempDir()
diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index e36d043e9e..d5495a2f2c 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -709,6 +709,9 @@ def import_file_from_uri(uri: str) -> str:
             tried.append(candidate_uri)
             try:
                 imported = toil.import_file(candidate_uri)
+            except FileNotFoundError as e:
+                # This isn't here.
+                imported = None
             except UnimplementedURLException as e:
                 # We can't find anything that can even support this URL scheme.
                 # Report to the user, they are probably missing an extra.
@@ -721,6 +724,7 @@ def import_file_from_uri(uri: str) -> str:
                 raise
             if imported is None:
                 # Wasn't found there
+                logger.info('Looked for %s at %s but did not find it', uri, candidate_uri)
                 continue
             logger.info('Imported %s', candidate_uri)
 
@@ -1389,7 +1393,7 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
         """
         Actually execute the workflow nodes.
         """
-        
+
         # Combine the bindings we get from previous jobs
         current_bindings = combine_bindings(unwrap_all(self._prev_node_results))
         # Set up the WDL standard library
@@ -1552,8 +1556,9 @@ def leaves(self) -> List[str]:
         leaves = set(self._nodes.keys())
         for node_id in self._nodes.keys():
             for dependency in self.get_dependencies(node_id):
-                # Mark everything depended on as not a leaf
-                leaves.remove(dependency)
+                if dependency in leaves:
+                    # Mark everything depended on as not a leaf
+                    leaves.remove(dependency)
         return list(leaves)
 
 
@@ -1598,7 +1603,7 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
         # To make Toil jobs, we need all the jobs they depend on made so we can
         # call .rv(). So we need to solve the workflow DAG ourselves to set it up
         # properly.
-           
+
         wdl_id_to_toil_job: Dict[str, WDLBaseJob] = {}
 
         creation_order = section_graph.topological_order()
@@ -1608,7 +1613,7 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
         for node_id in creation_order:
             # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
             prev_jobs = [wdl_id_to_toil_job[prev_node_id] for prev_node_id in section_graph.get_dependencies(node_id)]
-            
+
             logger.debug('Make Toil job for %s', node_id)
 
             rvs: List[Union[WDLBindings, Promise]] = [prev_job.rv() for prev_job in prev_jobs]
@@ -1630,7 +1635,7 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
 
             # Save the job
             wdl_id_to_toil_job[node_id] = job
-        
+
         # Find all the leaves
         leaf_ids = section_graph.leaves()
 

From d7664ed8e2865ca388a643c152270881182315c3 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 15 Jun 2023 12:42:30 -0400
Subject: [PATCH 05/20] Hook up node coalescing

---
 src/toil/wdl/wdltoil.py | 144 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 128 insertions(+), 16 deletions(-)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index d5495a2f2c..a535857027 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -1508,9 +1508,10 @@ def get(self, node_id: str) -> WDL.Tree.WorkflowNode:
         """
         return self._nodes[self.real_id(node_id)]
 
-    def get_dependencies(self, node_id: str) -> List[str]:
+    def get_dependencies(self, node_id: str) -> Set[str]:
         """
-        Get all the nodes that a node depends on.
+        Get all the nodes that a node depends on, recursively (into the node if
+        it has a body) but not transitively.
 
         Produces dependencies after resolving gathers and internal-to-section
         dependencies, on nodes that are also in this graph.
@@ -1535,8 +1536,32 @@ def get_dependencies(self, node_id: str) -> List[str]:
             if real_dependency in self._nodes:
                 dependencies.add(real_dependency)
 
-        return list(dependencies)
+    def get_transitive_dependencies(self, node_id: str) -> Set[str]:
+        """
+        Get all the nodes that a node depends on, transitively.
+        """
 
+        dependencies = set()
+        visited = set()
+        queue = [node_id]
+
+        while len(queue) > 0:
+            # Grab the enxt thing off the queue
+            here = queue[-1]
+            queue.pop()
+            if here in visited:
+                # Skip if we got it already
+                continue
+            # Get all its dependencies
+            here_deps = self.get_dependencies(here)
+            dependencies += here_deps
+            for dep in here_deps:
+                if dep not in visited:
+                    # And queue all the ones we haven't visited.
+                    queue.append(dep)
+
+        return dependencies
+            
     def topological_order(self) -> List[str]:
         """
         Get a topological order of the nodes, based on their dependencies.
@@ -1575,6 +1600,74 @@ def __init__(self, namespace: str, **kwargs: Any) -> None:
         super().__init__(**kwargs)
         self._namespace = namespace
 
+    def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> coalesced: List[List[str]]:
+        """
+        Given a topological order of WDL workflow node IDs, produce a list of
+        lists of IDs, still in topological order, where each list of IDs can be
+        run under a single Toil job.
+        """
+
+        # All the buckets of merged nodes
+        to_return: List[List[str]] = []
+        # The nodes we are currently merging, in topological order
+        current_bucket: List[str] = []
+        # All the non-decl transitive dependencies of nodes in the bucket
+        current_bucket_dependencies: Set[str] = set()
+
+        for next_id in order:
+            # Consider adding each node to the bucket
+            # Get all the dependencies on things that aren't decls.
+            next_dependencies = {dep for dep in section_graph.get_transitive_dependencies(next_id) if not isinstance(section_graph.get(dep), WDL.Tree.Decl}
+            if len(current_bucket) == 0:
+                # This is the first thing for the bucket
+                current_bucket.append(next_id)
+                current_bucket_dependencies |= next_dependencies
+            else:
+                # Get a node already in the bucket
+                current_id = current_bucket[0]
+                current_node = section_graph.get(current_id)
+
+                # And the node to maybe add
+                next_node = section_graph.get(next_id)
+                
+                if not isinstance(current_node, WDL.Tree.Decl) or not isinstance(next_node, WDL.Tree.Decl):
+                    # We can only combine decls with decls, so we can't go in
+                    # the bucket.
+                    
+                    # Finish the bucket.
+                    to_return.append(current_bucket)
+                    # Start a new one with this next node
+                    current_bucket = [next_id]
+                    current_bucket_dependencies = next_dependencies
+                else:
+                    # We have a decl in the bucket and a decl we could maybe
+                    # add. We know they are part of the same section, so we
+                    # aren't jumping in and out of conditionals or scatters.
+
+                    # We are going in a topological order, so we know the
+                    # bucket can't depend on the new node.
+
+                    if next_dependencies == current_bucket_dependencies:
+                        # We can add this node without adding more dependencies on non-decls on either side.
+                        # Nothing in the bucket can be in the dependency set because the bucket is only decls.
+                        # Put it in
+                        current_bucket.append(next_id)
+                        # TODO: With this condition, this is redundant.
+                        current_bucket_dependencies |= next_dependencies
+                    else:
+                        # Finish the bucket.
+                        to_return.append(current_bucket)
+                        # Start a new one with this next node
+                        current_bucket = [next_id]
+                        current_bucket_dependencies = next_dependencies
+
+        # Now finish the last bucket
+        to_return.append(current_bucket)
+
+        return to_return
+                        
+
+
     def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None) -> Job:
         """
         Make a Toil job to evaluate a subgraph inside a workflow or workflow
@@ -1607,21 +1700,29 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
         wdl_id_to_toil_job: Dict[str, WDLBaseJob] = {}
 
         creation_order = section_graph.topological_order()
-
         logger.debug('Creation order: %s', creation_order)
 
-        for node_id in creation_order:
+        # Now we want to organize the linear list of nodes into collections of nodes that can be in the same Toil job.
+        creation_jobs = self.coalesce_nodes(creation_order, section_graph)
+        logger.debug('Creation jobs: %s', creation_jobs)
+
+        for node_ids in creation_jobs:
             # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
-            prev_jobs = [wdl_id_to_toil_job[prev_node_id] for prev_node_id in section_graph.get_dependencies(node_id)]
+            prev_ids = {prev_node_id for node_id in node_ids for prev_node_id in section_graph.get_dependencies(node_id)}
+            prev_jobs = [wdl_id_to_toil_job[prev_node_id] for prev_node_id in prev_ids]
 
-            logger.debug('Make Toil job for %s', node_id)
+            logger.debug('Make Toil job for %s', node_ids)
 
             rvs: List[Union[WDLBindings, Promise]] = [prev_job.rv() for prev_job in prev_jobs]
             # We also need access to section-level bindings like inputs
             rvs.append(environment)
-
-            # Use them to make a new job
-            job = WDLWorkflowNodeJob(section_graph.get(node_id), rvs, self._namespace)
+            
+            if len(node_ids) == 1:
+                # Make a one-node job
+                job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace)
+            else:
+                # Make a multi-node job
+                job = WDLWorkflowNodeListJob([section_graph.get(node_id) for node_id in node_ids], rvs, self._namespace)
             for prev_job in prev_jobs:
                 # Connect up the happens-after relationships to make sure the
                 # return values are available.
@@ -1632,15 +1733,26 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
             if len(prev_jobs) == 0:
                 # Nothing came before this job, so connect it to the workflow.
                 self.addChild(job)
-
-            # Save the job
-            wdl_id_to_toil_job[node_id] = job
+            
+            for node_id in node_ids:
+                # Save the job
+                wdl_id_to_toil_job[node_id] = job
 
         # Find all the leaves
         leaf_ids = section_graph.leaves()
 
+        # Get a deduplicated list of TOil jobs that contain leaf nodes.
+        # TODO: Really we want the ones containign *only* leaf nodes.
+        leaf_jobs = []
+        leaf_job_object_ids = set()
+        for leaf_id in leaf_ids:
+            leaf_job = wdl_id_to_toil_job[leaf_id]
+            if id(leaf_job) not in leaf_job_object_ids:
+                leaf_jobs.append(leaf_job)
+                leaf_job_object_ids.add(id(leaf_job))
+
         # Make the sink job
-        leaf_rvs: List[Union[WDLBindings, Promise]] = [wdl_id_to_toil_job[node_id].rv() for node_id in leaf_ids]
+        leaf_rvs: List[Union[WDLBindings, Promise]] = [leaf_job.rv() for leaf_job in leaf_jobs]
         # Make sure to also send the section-level bindings
         leaf_rvs.append(environment)
         # And to fill in bindings from code not executed in this instantiation
@@ -1652,9 +1764,9 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
         )
         # It runs inside us
         self.addChild(sink)
-        for node_id in leaf_ids:
+        for leaf_job in leaf_jobs:
             # And after all the leaf jobs.
-            wdl_id_to_toil_job[node_id].addFollowOn(sink)
+            leaf_job.addFollowOn(sink)
 
         return sink
 

From 21d49dcea474d146cd8a586af010b5890b705cb4 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 22 Jun 2023 12:08:19 -0400
Subject: [PATCH 06/20] Make the leaf Toil jobs the ones that are only leaf
 nodes

---
 src/toil/wdl/wdltoil.py | 51 ++++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index a535857027..777b8038f2 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -1697,7 +1697,32 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
         # call .rv(). So we need to solve the workflow DAG ourselves to set it up
         # properly.
 
+        # We also need to be able to map back and forth between the WDL workflow nodes and the Toil jobs that run them
         wdl_id_to_toil_job: Dict[str, WDLBaseJob] = {}
+        toil_id_to_wdl_ids = {}
+        
+        def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
+            """
+            Get the distinct Toil jobs executing any of the given WDL nodes.
+            """
+            job_ids = set()
+            jobs = []
+            for job in (wdl_id_to_toil_job[wdl_id] for wdl_id in wdl_ids):
+                # For each job that is registered under any of these WDL IDs
+                if job.jobStoreID not in job_ids:
+                    # If we haven't taken it already, take it
+                    job_ids.add(job.jobStoreID)
+                    jobs.append(job)
+            return jobs
+
+        def get_job_set_only(wdl_ids: Set[str]) -> List[WDLBaseJob]:
+            """
+            Get the distinct Toil jobs executing only the given WDL nodes and
+            no others.
+            """
+            # Find the jobs that run any of the WDL nodes, and throw out the ones that run other stuff
+            return [job for job in get_job_set_any(wdl_ids) if any(wdl_id not in wdl_ids for wdl_id in toil_id_to_wdl_ids[job.jobStoreID])]
+        
 
         creation_order = section_graph.topological_order()
         logger.debug('Creation order: %s', creation_order)
@@ -1708,11 +1733,10 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
 
         for node_ids in creation_jobs:
             # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
-            prev_ids = {prev_node_id for node_id in node_ids for prev_node_id in section_graph.get_dependencies(node_id)}
-            prev_jobs = [wdl_id_to_toil_job[prev_node_id] for prev_node_id in prev_ids]
-
-            logger.debug('Make Toil job for %s', node_ids)
+            prev_node_ids = {prev_node_id for node_id in node_ids for prev_node_id in section_graph.get_dependencies(node_id)}
+            logger.debug('Make Toil job for %s', prev_node_ids)
 
+            prev_jobs = get_job_set_any(prev_node_ids)
             rvs: List[Union[WDLBindings, Promise]] = [prev_job.rv() for prev_job in prev_jobs]
             # We also need access to section-level bindings like inputs
             rvs.append(environment)
@@ -1735,22 +1759,13 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
                 self.addChild(job)
             
             for node_id in node_ids:
-                # Save the job
+                # Save the job for everything it executes
                 wdl_id_to_toil_job[node_id] = job
+            # And remember everything it executes
+            toil_id_to_wdl_ids[job.jobStoreID] = set(node_ids) 
 
-        # Find all the leaves
-        leaf_ids = section_graph.leaves()
-
-        # Get a deduplicated list of TOil jobs that contain leaf nodes.
-        # TODO: Really we want the ones containign *only* leaf nodes.
-        leaf_jobs = []
-        leaf_job_object_ids = set()
-        for leaf_id in leaf_ids:
-            leaf_job = wdl_id_to_toil_job[leaf_id]
-            if id(leaf_job) not in leaf_job_object_ids:
-                leaf_jobs.append(leaf_job)
-                leaf_job_object_ids.add(id(leaf_job))
-
+        # Get a deduplicated list of Toil jobs that execute only leaf nodes.
+        leaf_jobs = get_job_set_only(section_graph.leaves())
         # Make the sink job
         leaf_rvs: List[Union[WDLBindings, Promise]] = [leaf_job.rv() for leaf_job in leaf_jobs]
         # Make sure to also send the section-level bindings

From db3a353284e7dd1bbbc001eeeb82b8776b126671 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 22 Jun 2023 13:01:01 -0400
Subject: [PATCH 07/20] Implement a postprocessing mini-language

---
 src/toil/wdl/wdltoil.py | 229 +++++++++++++++++++++++++---------------
 1 file changed, 142 insertions(+), 87 deletions(-)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 777b8038f2..094a86f7f5 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -863,6 +863,11 @@ def map_over_typed_files_in_value(value: WDL.Value.Base, transform: Callable[[WD
 class WDLBaseJob(Job):
     """
     Base job class for all WDL-related jobs.
+
+    Responsible for post-processing returned bindings, to do things like add in
+    null values for things not defined in a section. Post-processing operations
+    can be added onto any job before it is saved, and will be applied as long
+    as the job's run method calls postprocess().
     """
 
     def __init__(self, **kwargs: Any) -> None:
@@ -887,6 +892,11 @@ def __init__(self, **kwargs: Any) -> None:
         # TODO: Make sure C-level stack size is also big enough for this.
         sys.setrecursionlimit(10000)
 
+        # We need an ordered list of postprocessing steps to apply, because we
+        # may ahve coalesced postprocessing steps deferred by several levels of
+        # jobs returing other jobs' promised RVs.
+        self._postprocessing_steps: List[Tuple[str, Union[str, Promised[WDLBindings]]]] = []
+
     # TODO: We're not allowed by MyPy to override a method and widen the return
     # type, so this has to be Any.
     def run(self, file_store: AbstractFileStore) -> Any:
@@ -898,6 +908,77 @@ def run(self, file_store: AbstractFileStore) -> Any:
         # bindings are actually linked lists or something?
         sys.setrecursionlimit(10000)
 
+    def then_underlay(self, underlay: Promised[WDLBindings]) -> None:
+        """
+        Apply an underlay of backup bindings to the result.
+        """
+        self._postprocessing_steps.append(("underlay", underlay))
+
+    def then_remove(self, remove: Promised[WDLBindings]) -> None:
+        """
+        Remove the given bindings from the result.
+        """
+        self._postprocessing_steps.append(("remove", remove))
+
+    def then_namespace(self, namespace: str) -> None:
+        """
+        Put the result bindings into a namespace.
+        """
+        self._postprocessing_steps.append(("namespace", namespace))
+
+    def then_overlay(self, overlay: Promised[WDLBindings]) -> None:
+        """
+        Overlay the given bindings on top of the (possibly namespaced) result.
+        """
+        self._postprocessing_steps.append(("overlay", overlay))
+
+    def postprocess(self, bindings: WDLBindings) -> WDLBindings:
+        """
+        Apply queued changes to bindings.
+
+        Should be applied by subclasses' run() implementations to their return
+        values.
+        """
+
+        for action, argument in self._postprocessing_steps:
+            # Interpret the mini language of postprocessing steps.
+            # These are too small to justify being their own separate jobs.
+            if action == "underlay":
+                if not isinstance(argument, WDLBindings):
+                    raise RuntimeError("Wrong postprocessing argument type")
+                # We want to apply values from the underlay if not set in the bindings
+                bindings = combine_bindings([bindings, argument.subtract(bindings)])
+            elif action == "remove":
+                if not isinstance(argument, WDLBindings):
+                    raise RuntimeError("Wrong postprocessing argument type")
+                # We need to take stuff out of scope
+                bindings = bindings.subtract(argument)
+            elif action == "namespace":
+                if not isinstance(argument, str):
+                    raise RuntimeError("Wrong postprocessing argument type")
+                # We are supposed to put all our results in a namespace
+                bindings = bindings.wrap_namespace(argument)
+            elif action == "overlay":
+                if not isinstance(argument, WDLBindings):
+                    raise RuntimeError("Wrong postprocessing argument type")
+                # We want to apply values from the overlay over the bindings
+                bindings = combine_bindings([bindings.subtract(argument), argument])
+            else:
+                raise RuntimeError(f"Unknown postprocessing action {action}")
+
+        return bindings
+
+    def defer_postprocessing(self, other: WDLBaseJob) -> None:
+        """
+        Give our postprocessing steps to a different job.
+
+        Use this when you are returning a promise for bindings, on the job that issues the promise.
+        """
+
+        other._postprocess_steps += self._postprocessing_steps
+        self._postprocessing_steps = []
+
+
 class WDLTaskJob(WDLBaseJob):
     """
     Job that runs a WDL task.
@@ -1317,7 +1398,7 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
             # This is a variable assignment
             logger.info('Setting %s to %s', self._node.name, self._node.expr)
             value = evaluate_decl(self._node, incoming_bindings, standard_library)
-            return incoming_bindings.bind(self._node.name, value)
+            return self.postprocess(incoming_bindings.bind(self._node.name, value))
         elif isinstance(self._node, WDL.Tree.Call):
             # This is a call of a task or workflow
 
@@ -1344,26 +1425,23 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
                 raise WDL.Error.InvalidType(self._node, "Cannot call a " + str(type(self._node.callee)))
 
             # We need to agregate outputs namespaced with our node name, and existing bindings
-            namespace_job = WDLNamespaceBindingsJob(self._node.name, [subjob.rv()])
-            subjob.addFollowOn(namespace_job)
-            self.addChild(namespace_job)
-
-            combine_job = WDLCombineBindingsJob([namespace_job.rv(), incoming_bindings])
-            namespace_job.addFollowOn(combine_job)
-            self.addChild(combine_job)
-
-            return combine_job.rv()
+            subjob.then_namespace(self._node.name)
+            subjob.then_overlay(incoming_bindings)
+            self.defer_postprocessing(subjob)
+            return subjob.rv()
         elif isinstance(self._node, WDL.Tree.Scatter):
             subjob = WDLScatterJob(self._node, [incoming_bindings], self._namespace)
             self.addChild(subjob)
             # Scatters don't really make a namespace, just kind of a scope?
             # TODO: Let stuff leave scope!
+            self.defer_postprocessing(subjob)
             return subjob.rv()
         elif isinstance(self._node, WDL.Tree.Conditional):
             subjob = WDLConditionalJob(self._node, [incoming_bindings], self._namespace)
             self.addChild(subjob)
             # Conditionals don't really make a namespace, just kind of a scope?
             # TODO: Let stuff leave scope!
+            self.defer_postprocessing(subjob)
             return subjob.rv()
         else:
             raise WDL.Error.InvalidType(self._node, "Unimplemented WorkflowNode: " + str(type(self._node)))
@@ -1408,7 +1486,7 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
             else:
                 raise WDL.Error.InvalidType(node, "Unimplemented WorkflowNode: " + str(type(node)))
 
-        return current_bindings
+        return self.postprocess(current_bindings)
 
 
 class WDLCombineBindingsJob(WDLBaseJob):
@@ -1417,7 +1495,7 @@ class WDLCombineBindingsJob(WDLBaseJob):
     environment changes.
     """
 
-    def __init__(self, prev_node_results: Sequence[Promised[WDLBindings]], underlay: Optional[Promised[WDLBindings]] = None, remove: Optional[Promised[WDLBindings]] = None, **kwargs: Any) -> None:
+    def __init__(self, prev_node_results: Sequence[Promised[WDLBindings]], **kwargs: Any) -> None:
         """
         Make a new job to combine the results of previous jobs.
 
@@ -1428,8 +1506,6 @@ def __init__(self, prev_node_results: Sequence[Promised[WDLBindings]], underlay:
         super().__init__(**kwargs)
 
         self._prev_node_results = prev_node_results
-        self._underlay = underlay
-        self._remove = remove
 
     def run(self, file_store: AbstractFileStore) -> WDLBindings:
         """
@@ -1437,34 +1513,8 @@ def run(self, file_store: AbstractFileStore) -> WDLBindings:
         """
         super().run(file_store)
         combined = combine_bindings(unwrap_all(self._prev_node_results))
-        if self._underlay is not None:
-            # Fill in from the underlay anything not defined in anything else.
-            combined = combine_bindings([combined, unwrap(self._underlay).subtract(combined)])
-        if self._remove is not None:
-            # We need to take stuff out of scope
-            combined = combined.subtract(unwrap(self._remove))
-        return combined
-
-class WDLNamespaceBindingsJob(WDLBaseJob):
-    """
-    Job that puts a set of bindings into a namespace.
-    """
-
-    def __init__(self, namespace: str, prev_node_results: Sequence[Promised[WDLBindings]], **kwargs: Any) -> None:
-        """
-        Make a new job to namespace results.
-        """
-        super().__init__(**kwargs)
-
-        self._namespace = namespace
-        self._prev_node_results = prev_node_results
-
-    def run(self, file_store: AbstractFileStore) -> WDLBindings:
-        """
-        Apply the namespace
-        """
-        super().run(file_store)
-        return combine_bindings(unwrap_all(self._prev_node_results)).wrap_namespace(self._namespace)
+        # Make sure to run the universal postprocessing steps
+        return self.postprocess(combined)
 
 class WDLWorkflowGraph:
     """
@@ -1697,10 +1747,12 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
         # call .rv(). So we need to solve the workflow DAG ourselves to set it up
         # properly.
 
-        # We also need to be able to map back and forth between the WDL workflow nodes and the Toil jobs that run them
+        # When a WDL node depends on another, we need to be able to find the Toil job we need an rv from.
         wdl_id_to_toil_job: Dict[str, WDLBaseJob] = {}
-        toil_id_to_wdl_ids = {}
-        
+        # We need the set of Toil jobs not depended on so we can wire them up to the sink.
+        # This maps from Toil job store ID to job.
+        toil_leaves = {}
+
         def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
             """
             Get the distinct Toil jobs executing any of the given WDL nodes.
@@ -1715,15 +1767,6 @@ def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
                     jobs.append(job)
             return jobs
 
-        def get_job_set_only(wdl_ids: Set[str]) -> List[WDLBaseJob]:
-            """
-            Get the distinct Toil jobs executing only the given WDL nodes and
-            no others.
-            """
-            # Find the jobs that run any of the WDL nodes, and throw out the ones that run other stuff
-            return [job for job in get_job_set_any(wdl_ids) if any(wdl_id not in wdl_ids for wdl_id in toil_id_to_wdl_ids[job.jobStoreID])]
-        
-
         creation_order = section_graph.topological_order()
         logger.debug('Creation order: %s', creation_order)
 
@@ -1735,8 +1778,15 @@ def get_job_set_only(wdl_ids: Set[str]) -> List[WDLBaseJob]:
             # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
             prev_node_ids = {prev_node_id for node_id in node_ids for prev_node_id in section_graph.get_dependencies(node_id)}
             logger.debug('Make Toil job for %s', prev_node_ids)
-
+            
+            # Get the Toil jobs we depend on
             prev_jobs = get_job_set_any(prev_node_ids)
+            for prev_job in prev_jobs:
+                if prev_job.jobStoreID in toil_leaves:
+                    # Mark them all as depended on
+                    del toil_leaves[prev_job.jobStoreID]
+
+            # Get their return values to feed into the new job
             rvs: List[Union[WDLBindings, Promise]] = [prev_job.rv() for prev_job in prev_jobs]
             # We also need access to section-level bindings like inputs
             rvs.append(environment)
@@ -1761,28 +1811,33 @@ def get_job_set_only(wdl_ids: Set[str]) -> List[WDLBaseJob]:
             for node_id in node_ids:
                 # Save the job for everything it executes
                 wdl_id_to_toil_job[node_id] = job
-            # And remember everything it executes
-            toil_id_to_wdl_ids[job.jobStoreID] = set(node_ids) 
-
-        # Get a deduplicated list of Toil jobs that execute only leaf nodes.
-        leaf_jobs = get_job_set_only(section_graph.leaves())
-        # Make the sink job
-        leaf_rvs: List[Union[WDLBindings, Promise]] = [leaf_job.rv() for leaf_job in leaf_jobs]
-        # Make sure to also send the section-level bindings
-        leaf_rvs.append(environment)
-        # And to fill in bindings from code not executed in this instantiation
-        # with Null, and filter out stuff that should leave scope.
-        sink = WDLCombineBindingsJob(
-            leaf_rvs,
-            underlay=self.make_gather_bindings(gather_nodes, WDL.Value.Null()),
-            remove=local_environment
-        )
-        # It runs inside us
-        self.addChild(sink)
-        for leaf_job in leaf_jobs:
-            # And after all the leaf jobs.
-            leaf_job.addFollowOn(sink)
+            
+            # It isn't depended on yet
+            toil_leaves[job.jobStoreID] = job
 
+        if len(toil_leaves) == 1:
+            # There's one final node so we can just tack postprocessing onto that.
+            sink: WDLBaseJob = next(toil_leaves.values())
+        else:
+            # We need to bring together with a new sink
+            # Make the sink job to collect all their results.
+            leaf_rvs: List[Union[WDLBindings, Promise]] = [leaf_job.rv() for leaf_job in toil_leaves.values()]
+            # Make sure to also send the section-level bindings
+            leaf_rvs.append(environment)
+            # And to fill in bindings from code not executed in this instantiation
+            # with Null, and filter out stuff that should leave scope.
+            sink: WDLBaseJob = WDLCombineBindingsJob(leaf_rvs)
+            # It runs inside us
+            self.addChild(sink)
+            for leaf_job in toil_leaves.values():
+                # And after all the leaf jobs.
+                leaf_job.addFollowOn(sink)
+
+
+        # Apply the final postprocessing for leaving the section.
+        sink.then_underlay(self.make_gather_bindings(gather_nodes, WDL.Value.Null()))
+        sink.then_remove(local_environment)
+        
         return sink
 
     def make_gather_bindings(self, gathers: Sequence[WDL.Tree.Gather], undefined: WDL.Value.Base) -> WDLBindings:
@@ -1906,6 +1961,7 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
         self.addChild(gather_job)
         for j in scatter_jobs:
             j.addFollowOn(gather_job)
+        self.defer_postprocessing(gather_job)
         return gather_job.rv()
 
 class WDLArrayBindingsJob(WDLBaseJob):
@@ -1963,7 +2019,7 @@ def run(self, file_store: AbstractFileStore) -> WDLBindings:
             result = result.bind(name, WDL.Value.Array(supertype, [env.resolve(name) if env.has_binding(name) else WDL.Value.Null() for env in new_bindings]))
 
         # Base bindings are already included so return the result
-        return result
+        return self.postprocess(result)
 
 class WDLConditionalJob(WDLSectionJob):
     """
@@ -2005,13 +2061,14 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
             logger.info('Condition is true')
             # Run the body and return its effects
             body_job = self.create_subgraph(self._conditional.body, list(self._conditional.gathers.values()), bindings)
+            self.defer_postprocessing(body_job)
             return body_job.rv()
         else:
             logger.info('Condition is false')
             # Return the input bindings and null bindings for all our gathers.
             # Should not collide at all.
             gather_bindings = self.make_gather_bindings(list(self._conditional.gathers.values()), WDL.Value.Null())
-            return combine_bindings([bindings, gather_bindings])
+            return self.postprocess(combine_bindings([bindings, gather_bindings]))
 
 class WDLWorkflowJob(WDLSectionJob):
     """
@@ -2069,11 +2126,12 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
             # Add evaluating the outputs after the sink
             outputs_job = WDLOutputsJob(self._workflow.outputs, sink.rv())
             sink.addFollowOn(outputs_job)
-            # Caller takes care of namespacing the result
+            # Caller is responsible for making sure namespaces are applied
+            self.defer_postprocessing(outputs_job)
             return outputs_job.rv()
         else:
             # No outputs from this workflow.
-            return WDL.Env.Bindings()
+            return self.postprocess(WDL.Env.Bindings())
 
 class WDLOutputsJob(WDLBaseJob):
     """
@@ -2103,7 +2161,7 @@ def run(self, file_store: AbstractFileStore) -> WDLBindings:
         for output_decl in self._outputs:
             output_bindings = output_bindings.bind(output_decl.name, evaluate_decl(output_decl, unwrap(self._bindings), standard_library))
 
-        return output_bindings
+        return self.postprocess(output_bindings)
 
 class WDLRootJob(WDLSectionJob):
     """
@@ -2132,13 +2190,10 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
         # Run the workflow. We rely in this to handle entering the input
         # namespace if needed, or handling free-floating inputs.
         workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace)
+        workflow_job.then_namespace(self._namespace)
         self.addChild(workflow_job)
-
-        # And namespace its outputs
-        namespace_job = WDLNamespaceBindingsJob(self._namespace, [workflow_job.rv()])
-        workflow_job.addFollowOn(namespace_job)
-
-        return namespace_job.rv()
+        self.defer_postprocessing(workflow_job)
+        return workflow_job.rv()
 
 def main() -> None:
     """

From 2def6df8408a1ab36aa2dfacc34b40064f8c3148 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 22 Jun 2023 13:34:47 -0400
Subject: [PATCH 08/20] Fix syntax

---
 src/toil/wdl/wdltoil.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 094a86f7f5..4403cfd133 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -968,7 +968,7 @@ def postprocess(self, bindings: WDLBindings) -> WDLBindings:
 
         return bindings
 
-    def defer_postprocessing(self, other: WDLBaseJob) -> None:
+    def defer_postprocessing(self, other: "WDLBaseJob") -> None:
         """
         Give our postprocessing steps to a different job.
 
@@ -1650,7 +1650,7 @@ def __init__(self, namespace: str, **kwargs: Any) -> None:
         super().__init__(**kwargs)
         self._namespace = namespace
 
-    def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> coalesced: List[List[str]]:
+    def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
         """
         Given a topological order of WDL workflow node IDs, produce a list of
         lists of IDs, still in topological order, where each list of IDs can be
@@ -1667,7 +1667,7 @@ def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> coalesc
         for next_id in order:
             # Consider adding each node to the bucket
             # Get all the dependencies on things that aren't decls.
-            next_dependencies = {dep for dep in section_graph.get_transitive_dependencies(next_id) if not isinstance(section_graph.get(dep), WDL.Tree.Decl}
+            next_dependencies = {dep for dep in section_graph.get_transitive_dependencies(next_id) if not isinstance(section_graph.get(dep), WDL.Tree.Decl)}
             if len(current_bucket) == 0:
                 # This is the first thing for the bucket
                 current_bucket.append(next_id)

From ebffa60392b79d95bf012110f96ee9913f833313 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 22 Jun 2023 19:02:22 -0400
Subject: [PATCH 09/20] Fix enough semantics that evaluation can actually fail

---
 src/toil/wdl/wdltoil.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 4403cfd133..936dc6b4ea 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -975,7 +975,7 @@ def defer_postprocessing(self, other: "WDLBaseJob") -> None:
         Use this when you are returning a promise for bindings, on the job that issues the promise.
         """
 
-        other._postprocess_steps += self._postprocessing_steps
+        other._postprocessing_steps += self._postprocessing_steps
         self._postprocessing_steps = []
 
 
@@ -1586,6 +1586,8 @@ def get_dependencies(self, node_id: str) -> Set[str]:
             if real_dependency in self._nodes:
                 dependencies.add(real_dependency)
 
+        return dependencies
+
     def get_transitive_dependencies(self, node_id: str) -> Set[str]:
         """
         Get all the nodes that a node depends on, transitively.
@@ -1604,7 +1606,7 @@ def get_transitive_dependencies(self, node_id: str) -> Set[str]:
                 continue
             # Get all its dependencies
             here_deps = self.get_dependencies(here)
-            dependencies += here_deps
+            dependencies |= here_deps
             for dep in here_deps:
                 if dep not in visited:
                     # And queue all the ones we haven't visited.
@@ -1650,7 +1652,7 @@ def __init__(self, namespace: str, **kwargs: Any) -> None:
         super().__init__(**kwargs)
         self._namespace = namespace
 
-    def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
+    def coalesce_nodes(self, order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
         """
         Given a topological order of WDL workflow node IDs, produce a list of
         lists of IDs, still in topological order, where each list of IDs can be
@@ -1817,7 +1819,7 @@ def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
 
         if len(toil_leaves) == 1:
             # There's one final node so we can just tack postprocessing onto that.
-            sink: WDLBaseJob = next(toil_leaves.values())
+            sink: WDLBaseJob = next(iter(toil_leaves.values()))
         else:
             # We need to bring together with a new sink
             # Make the sink job to collect all their results.

From 4f825a1056d728a8096cbe45df51b73a094e1448 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 6 Jul 2023 16:16:01 -0400
Subject: [PATCH 10/20] Get the md5sum test working

---
 src/toil/test/wdl/wdltoil_test.py |  2 +-
 src/toil/wdl/wdltoil.py           | 33 ++++++++++++++++++++++++-------
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/src/toil/test/wdl/wdltoil_test.py b/src/toil/test/wdl/wdltoil_test.py
index 2d4eb50534..77f44d50ec 100644
--- a/src/toil/test/wdl/wdltoil_test.py
+++ b/src/toil/test/wdl/wdltoil_test.py
@@ -33,7 +33,7 @@ def testMD5sum(self):
         wdl = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.1.0.wdl')
         json_file = os.path.abspath('src/toil/test/wdl/md5sum/md5sum.json')
 
-        result_json = subprocess.check_output(self.base_command + [wdl, json_file, '-o', self.output_dir, '--logDebug'])
+        result_json = subprocess.check_output(self.base_command + [wdl, json_file, '-o', self.output_dir, '--logDebug', '--retryCount=0'])
         result = json.loads(result_json)
 
         assert 'ga4ghMd5.value' in result
diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 936dc6b4ea..566e52ca5e 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -624,7 +624,7 @@ def evaluate_named_expression(context: Union[WDL.Error.SourceNode, WDL.Error.Sou
         except Exception:
             # If something goes wrong, dump.
             logger.exception("Expression evaluation failed for %s: %s", name, expression)
-            log_bindings(logger.exception, "Expression was evaluated in:", [environment])
+            log_bindings(logger.error, "Expression was evaluated in:", [environment])
             raise
 
     if expected_type:
@@ -669,7 +669,7 @@ def evaluate_defaultable_decl(node: WDL.Tree.Decl, environment: WDLBindings, std
     except Exception:
         # If something goes wrong, dump.
         logger.exception("Evaluation failed for %s", node)
-        log_bindings(logger.exception, "Statement was evaluated in:", [environment])
+        log_bindings(logger.error, "Statement was evaluated in:", [environment])
         raise
 
 # TODO: make these stdlib methods???
@@ -912,24 +912,28 @@ def then_underlay(self, underlay: Promised[WDLBindings]) -> None:
         """
         Apply an underlay of backup bindings to the result.
         """
+        logger.debug("Underlay %s after %s", underlay, self)
         self._postprocessing_steps.append(("underlay", underlay))
 
     def then_remove(self, remove: Promised[WDLBindings]) -> None:
         """
         Remove the given bindings from the result.
         """
+        logger.debug("Remove %s after %s", remove, self)
         self._postprocessing_steps.append(("remove", remove))
 
     def then_namespace(self, namespace: str) -> None:
         """
         Put the result bindings into a namespace.
         """
+        logger.debug("Namespace %s after %s", namespace, self)
         self._postprocessing_steps.append(("namespace", namespace))
 
     def then_overlay(self, overlay: Promised[WDLBindings]) -> None:
         """
         Overlay the given bindings on top of the (possibly namespaced) result.
         """
+        logger.debug("Overlay %s after %s", overlay, self)
         self._postprocessing_steps.append(("overlay", overlay))
 
     def postprocess(self, bindings: WDLBindings) -> WDLBindings:
@@ -941,15 +945,18 @@ def postprocess(self, bindings: WDLBindings) -> WDLBindings:
         """
 
         for action, argument in self._postprocessing_steps:
+
+            logger.debug("Apply postprocessing setp: (%s, %s)", action, argument)
+
             # Interpret the mini language of postprocessing steps.
             # These are too small to justify being their own separate jobs.
             if action == "underlay":
-                if not isinstance(argument, WDLBindings):
+                if not isinstance(argument, WDL.Env.Bindings):
                     raise RuntimeError("Wrong postprocessing argument type")
                 # We want to apply values from the underlay if not set in the bindings
                 bindings = combine_bindings([bindings, argument.subtract(bindings)])
             elif action == "remove":
-                if not isinstance(argument, WDLBindings):
+                if not isinstance(argument, WDL.Env.Bindings):
                     raise RuntimeError("Wrong postprocessing argument type")
                 # We need to take stuff out of scope
                 bindings = bindings.subtract(argument)
@@ -959,7 +966,7 @@ def postprocess(self, bindings: WDLBindings) -> WDLBindings:
                 # We are supposed to put all our results in a namespace
                 bindings = bindings.wrap_namespace(argument)
             elif action == "overlay":
-                if not isinstance(argument, WDLBindings):
+                if not isinstance(argument, WDL.Env.Bindings):
                     raise RuntimeError("Wrong postprocessing argument type")
                 # We want to apply values from the overlay over the bindings
                 bindings = combine_bindings([bindings.subtract(argument), argument])
@@ -978,6 +985,8 @@ def defer_postprocessing(self, other: "WDLBaseJob") -> None:
         other._postprocessing_steps += self._postprocessing_steps
         self._postprocessing_steps = []
 
+        logger.debug("Assigned postprocessing steps from %s to %s", self, other)
+
 
 class WDLTaskJob(WDLBaseJob):
     """
@@ -1161,6 +1170,10 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
             rescheduled = WDLTaskJob(self._task, self._prev_node_results, self._task_id, self._namespace, cores=runtime_cores or self.cores, memory=runtime_memory or self.memory, disk=runtime_disk or self.disk, accelerators=runtime_accelerators or self.accelerators)
             # Run that as a child
             self.addChild(rescheduled)
+
+            # Give it our postprocessing steps
+            self.defer_postprocessing(rescheduled)
+
             # And return its result.
             return rescheduled.rv()
 
@@ -1362,6 +1375,9 @@ def patched_run_invocation(*args: Any, **kwargs: Any) -> List[str]:
         # Upload any files in the outputs if not uploaded already. Accounts for how relative paths may still need to be container-relative.
         output_bindings = virtualize_files(output_bindings, outputs_library)
 
+        # Do postprocessing steps to e.g. apply namespaces.
+        output_bindings = self.postprocess(output_bindings)
+
         return output_bindings
 
 class WDLWorkflowNodeJob(WDLBaseJob):
@@ -1779,7 +1795,7 @@ def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
         for node_ids in creation_jobs:
             # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
             prev_node_ids = {prev_node_id for node_id in node_ids for prev_node_id in section_graph.get_dependencies(node_id)}
-            logger.debug('Make Toil job for %s', prev_node_ids)
+            logger.debug('Make Toil job for %s', node_ids)
             
             # Get the Toil jobs we depend on
             prev_jobs = get_job_set_any(prev_node_ids)
@@ -1835,10 +1851,13 @@ def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
                 # And after all the leaf jobs.
                 leaf_job.addFollowOn(sink)
 
+        logger.debug("Sink job is: %s", sink)
+
 
         # Apply the final postprocessing for leaving the section.
         sink.then_underlay(self.make_gather_bindings(gather_nodes, WDL.Value.Null()))
-        sink.then_remove(local_environment)
+        if local_environment is not None:
+            sink.then_remove(local_environment)
         
         return sink
 

From ea9b77fd4d6307362ce871cf937718b535fa069f Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 6 Jul 2023 16:55:57 -0400
Subject: [PATCH 11/20] Handle workflows with no body jobs

---
 src/toil/wdl/wdltoil.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 566e52ca5e..70a2f7d8da 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -1729,8 +1729,9 @@ def coalesce_nodes(self, order: List[str], section_graph: WDLWorkflowGraph) -> L
                         current_bucket = [next_id]
                         current_bucket_dependencies = next_dependencies
 
-        # Now finish the last bucket
-        to_return.append(current_bucket)
+        if len(current_bucket) > 0:
+            # Now finish the last bucket
+            to_return.append(current_bucket)
 
         return to_return
                         

From b6cfd132e2cba6a361e00dbcecd674483562586c Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 6 Jul 2023 17:32:46 -0400
Subject: [PATCH 12/20] Actually populate visited set, and fix MyPy

---
 src/toil/wdl/wdltoil.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 70a2f7d8da..acbc6ae9ce 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -44,7 +44,7 @@
 import WDL.runtime.config
 
 from toil.common import Config, Toil, addOptions
-from toil.job import AcceleratorRequirement, Job, JobFunctionWrappingJob, Promise, Promised, accelerators_fully_satisfy, parse_accelerator, unwrap, unwrap_all
+from toil.job import AcceleratorRequirement, Job, JobFunctionWrappingJob, Promise, Promised, TemporaryID, accelerators_fully_satisfy, parse_accelerator, unwrap, unwrap_all
 from toil.fileStores import FileID
 from toil.fileStores.abstractFileStore import AbstractFileStore
 from toil.jobStores.abstractJobStore import AbstractJobStore, UnimplementedURLException
@@ -1431,7 +1431,7 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
 
             if isinstance(self._node.callee, WDL.Tree.Workflow):
                 # This is a call of a workflow
-                subjob: Job = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}')
+                subjob: WDLBaseJob = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}')
                 self.addChild(subjob)
             elif isinstance(self._node.callee, WDL.Tree.Task):
                 # This is a call of a task
@@ -1609,8 +1609,8 @@ def get_transitive_dependencies(self, node_id: str) -> Set[str]:
         Get all the nodes that a node depends on, transitively.
         """
 
-        dependencies = set()
-        visited = set()
+        dependencies: Set[str] = set()
+        visited: Set[str] = set()
         queue = [node_id]
 
         while len(queue) > 0:
@@ -1620,6 +1620,8 @@ def get_transitive_dependencies(self, node_id: str) -> Set[str]:
             if here in visited:
                 # Skip if we got it already
                 continue
+            # Mark it got
+            visited.add(here)
             # Get all its dependencies
             here_deps = self.get_dependencies(here)
             dependencies |= here_deps
@@ -1737,7 +1739,7 @@ def coalesce_nodes(self, order: List[str], section_graph: WDLWorkflowGraph) -> L
                         
 
 
-    def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None) -> Job:
+    def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None) -> WDLBaseJob:
         """
         Make a Toil job to evaluate a subgraph inside a workflow or workflow
         section.
@@ -1770,7 +1772,7 @@ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes:
         wdl_id_to_toil_job: Dict[str, WDLBaseJob] = {}
         # We need the set of Toil jobs not depended on so we can wire them up to the sink.
         # This maps from Toil job store ID to job.
-        toil_leaves = {}
+        toil_leaves: Dict[Union[str, TemporaryID], WDLBaseJob] = {}
 
         def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
             """
@@ -1845,7 +1847,7 @@ def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
             leaf_rvs.append(environment)
             # And to fill in bindings from code not executed in this instantiation
             # with Null, and filter out stuff that should leave scope.
-            sink: WDLBaseJob = WDLCombineBindingsJob(leaf_rvs)
+            sink = WDLCombineBindingsJob(leaf_rvs)
             # It runs inside us
             self.addChild(sink)
             for leaf_job in toil_leaves.values():

From 390eac439f818754c5b718bb1113dce0da4869b0 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Tue, 11 Jul 2023 10:00:50 -0700
Subject: [PATCH 13/20] Add missing super run()

---
 src/toil/wdl/wdltoil.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index acbc6ae9ce..3ea6277e4e 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -1487,6 +1487,7 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
         """
         Actually execute the workflow nodes.
         """
+        super().run(file_store)
 
         # Combine the bindings we get from previous jobs
         current_bindings = combine_bindings(unwrap_all(self._prev_node_results))

From 6bbd3760fadcda5529d0165103a0efebf1fbfea2 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 20 Jul 2023 13:54:03 -0400
Subject: [PATCH 14/20] Quiet debugging

---
 src/toil/wdl/wdltoil.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index ce34f82191..b2133e0f0b 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -723,7 +723,6 @@ def import_file_from_uri(uri: str) -> str:
                 raise
             if imported is None:
                 # Wasn't found there
-                logger.info('Looked for %s at %s but did not find it', uri, candidate_uri)
                 continue
             logger.info('Imported %s', candidate_uri)
 

From ac283740aaacff9e592485362e0d66097ce7c0e4 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 20 Jul 2023 13:54:18 -0400
Subject: [PATCH 15/20] Restrict Docker CUDA test to Docker

---
 src/toil/test/wdl/wdltoil_test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/toil/test/wdl/wdltoil_test.py b/src/toil/test/wdl/wdltoil_test.py
index 99dc689890..a9b3414798 100644
--- a/src/toil/test/wdl/wdltoil_test.py
+++ b/src/toil/test/wdl/wdltoil_test.py
@@ -85,9 +85,8 @@ def test_miniwdl_self_test(self):
 
     @slow
     @needs_docker_cuda
-    @needs_singularity_or_docker
     def test_giraffe_deepvariant(self):
-        """Test if Giraffe and CPU DeepVariant run. This could take 25 minutes."""
+        """Test if Giraffe and GPU DeepVariant run. This could take 25 minutes."""
         # TODO: enable test if nvidia-container-runtime and Singularity are installed but Docker isn't.
 
         json_dir = self._createTempDir()

From ddc1f4de9b6ab3033f69713cd6cf8853ce44c5d4 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 27 Jul 2023 12:23:33 -0400
Subject: [PATCH 16/20] Fix spelling

---
 src/toil/wdl/wdltoil.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index b2133e0f0b..595d151cea 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -891,8 +891,8 @@ def __init__(self, **kwargs: Any) -> None:
         sys.setrecursionlimit(10000)
 
         # We need an ordered list of postprocessing steps to apply, because we
-        # may ahve coalesced postprocessing steps deferred by several levels of
-        # jobs returing other jobs' promised RVs.
+        # may have coalesced postprocessing steps deferred by several levels of
+        # jobs returning other jobs' promised RVs.
         self._postprocessing_steps: List[Tuple[str, Union[str, Promised[WDLBindings]]]] = []
 
     # TODO: We're not allowed by MyPy to override a method and widen the return

From e67eda24b580ee0972aa021f5d575da9bb4f9871 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 27 Jul 2023 13:16:52 -0400
Subject: [PATCH 17/20] Add testing for the decl coalescing logic

---
 src/toil/test/wdl/wdltoil_test.py | 97 +++++++++++++++++++++++++++++++
 src/toil/wdl/wdltoil.py           | 18 +++---
 2 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/src/toil/test/wdl/wdltoil_test.py b/src/toil/test/wdl/wdltoil_test.py
index 8b7976d9a3..a7e3717bad 100644
--- a/src/toil/test/wdl/wdltoil_test.py
+++ b/src/toil/test/wdl/wdltoil_test.py
@@ -3,8 +3,10 @@
 import shutil
 import subprocess
 import unittest
+from unittest.mock import patch
 import uuid
 import zipfile
+from typing import Any, Dict, List, Set
 from urllib.request import urlretrieve
 
 import pytest
@@ -14,6 +16,8 @@
 # Don't import the test case directly or pytest will test it again.
 import toil.test.wdl.toilwdlTest
 
+from toil.wdl.wdltoil import WDLSectionJob, WDLWorkflowGraph
+
 class WdlToilTest(toil.test.wdl.toilwdlTest.ToilWdlTest):
     """
     Version of the old Toil WDL tests that tests the new MiniWDL-based implementation.
@@ -181,5 +185,98 @@ def test_empty_file_path(self):
         assert retval != 0
         assert b'Could not find' in stderr
 
+    def test_coalesce(self):
+        """
+        Test if WDLSectionJob can coalesce WDL decls.
+
+        White box test; will need to be changed or removed if the WDL interpreter changes.
+        """
+
+        all_decls: Set[str] = set()
+        all_deps: Dict[str, Set[str]] = {}
+
+        def mock_is_decl(self: Any, node_id: str) -> bool:
+            return node_id in all_decls
+
+        def mock_get_transitive_dependencies(self: Any, node_id: str) -> Set[str]:
+            return all_deps[node_id]
+
+        # These are the only two methods coalesce_nodes calls.
+        # Can we enforce that somehow?
+        with patch.object(WDLWorkflowGraph, 'is_decl', mock_is_decl):
+            with patch.object(WDLWorkflowGraph, 'get_transitive_dependencies', mock_get_transitive_dependencies):
+
+                with self.subTest(msg="Two unrelated decls can coalesce"):
+                    # Set up two unrelated decls
+                    all_decls = {"decl1", "decl2"}
+                    all_deps = {
+                        "decl1": set(),
+                        "decl2": set()
+                    }
+
+                    result = WDLSectionJob.coalesce_nodes(["decl1", "decl2"], WDLWorkflowGraph([]))
+                    
+                    # Make sure they coalesced
+                    assert len(result) == 1
+                    assert "decl1" in result[0]
+                    assert "decl2" in result[0]
+
+                with self.subTest(msg="A decl will not coalesce with a non-decl"):
+                    all_decls = {"decl"}
+                    all_deps = {
+                        "decl": set(),
+                        "nondecl": set()
+                    }
+
+                    result = WDLSectionJob.coalesce_nodes(["decl", "nondecl"], WDLWorkflowGraph([]))
+                    
+                    assert len(result) == 2
+                    assert len(result[0]) == 1
+                    assert len(result[1]) == 1
+
+
+                with self.subTest(msg="Two adjacent decls with a common dependency can coalesce"):
+                    all_decls = {"decl1", "decl2"}
+                    all_deps = {
+                        "decl1": {"base"},
+                        "decl2": {"base"},
+                        "base": set()
+                    }
+
+                    result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
+                    
+                    assert len(result) == 2
+                    assert "base" in result[0]
+                    assert "decl1" in result[1]
+                    assert "decl2" in result[1]
+
+                with self.subTest(msg="Two adjacent decls with different dependencies will not coalesce"):
+                    all_decls = {"decl1", "decl2"}
+                    all_deps = {
+                        "decl1": {"base"},
+                        "decl2": set(),
+                        "base": set()
+                    }
+
+                    result = WDLSectionJob.coalesce_nodes(["base", "decl1", "decl2"], WDLWorkflowGraph([]))
+                    
+                    assert len(result) == 3
+                    assert "base" in result[0]
+
+                with self.subTest(msg="Two adjacent decls with different successors will coalesce"):
+                    all_decls = {"decl1", "decl2"}
+                    all_deps = {
+                        "decl1": set(),
+                        "decl2": set(),
+                        "successor": {"decl2"}
+                    }
+
+                    result = WDLSectionJob.coalesce_nodes(["decl1", "decl2", "successor"], WDLWorkflowGraph([]))
+                    
+                    assert len(result) == 2
+                    assert "decl1" in result[0]
+                    assert "decl2" in result[0]
+                    assert "successor" in result[1]
+
 if __name__ == "__main__":
     unittest.main()  # run all tests
diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index 595d151cea..dfb91652a0 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -1567,6 +1567,13 @@ def real_id(self, node_id: str) -> str:
         """
         return self._gather_to_section.get(node_id, node_id)
 
+    def is_decl(self, node_id: str) -> bool:
+        """
+        Return True if a node represents a WDL declaration, and false
+        otherwise.
+        """
+        return isinstance(self.get(node_id), WDL.Tree.Decl)
+
     def get(self, node_id: str) -> WDL.Tree.WorkflowNode:
         """
         Get a node by ID.
@@ -1669,7 +1676,8 @@ def __init__(self, namespace: str, **kwargs: Any) -> None:
         super().__init__(**kwargs)
         self._namespace = namespace
 
-    def coalesce_nodes(self, order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
+    @staticmethod
+    def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
         """
         Given a topological order of WDL workflow node IDs, produce a list of
         lists of IDs, still in topological order, where each list of IDs can be
@@ -1686,7 +1694,7 @@ def coalesce_nodes(self, order: List[str], section_graph: WDLWorkflowGraph) -> L
         for next_id in order:
             # Consider adding each node to the bucket
             # Get all the dependencies on things that aren't decls.
-            next_dependencies = {dep for dep in section_graph.get_transitive_dependencies(next_id) if not isinstance(section_graph.get(dep), WDL.Tree.Decl)}
+            next_dependencies = {dep for dep in section_graph.get_transitive_dependencies(next_id) if not section_graph.is_decl(dep)}
             if len(current_bucket) == 0:
                 # This is the first thing for the bucket
                 current_bucket.append(next_id)
@@ -1694,12 +1702,8 @@ def coalesce_nodes(self, order: List[str], section_graph: WDLWorkflowGraph) -> L
             else:
                 # Get a node already in the bucket
                 current_id = current_bucket[0]
-                current_node = section_graph.get(current_id)
 
-                # And the node to maybe add
-                next_node = section_graph.get(next_id)
-                
-                if not isinstance(current_node, WDL.Tree.Decl) or not isinstance(next_node, WDL.Tree.Decl):
+                if not section_graph.is_decl(current_id) or not section_graph.is_decl(next_id):
                     # We can only combine decls with decls, so we can't go in
                     # the bucket.
                     

From c539c0ed29c5721e800b6432418d91d921a47923 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Thu, 27 Jul 2023 13:20:57 -0400
Subject: [PATCH 18/20] Improve test comments

---
 src/toil/test/wdl/wdltoil_test.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/toil/test/wdl/wdltoil_test.py b/src/toil/test/wdl/wdltoil_test.py
index a7e3717bad..fe0257882e 100644
--- a/src/toil/test/wdl/wdltoil_test.py
+++ b/src/toil/test/wdl/wdltoil_test.py
@@ -192,17 +192,30 @@ def test_coalesce(self):
         White box test; will need to be changed or removed if the WDL interpreter changes.
         """
 
+        # Set up data structures for our fake workflow graph to pull from.
+        # This has all decl-type nodes
         all_decls: Set[str] = set()
+        # And this has all transitive dependencies for all nodes.
         all_deps: Dict[str, Set[str]] = {}
 
         def mock_is_decl(self: Any, node_id: str) -> bool:
+            """
+            Replacement function to determine if a node is a decl or not.
+            """
             return node_id in all_decls
 
         def mock_get_transitive_dependencies(self: Any, node_id: str) -> Set[str]:
+            """
+            Replacement function to get all the transitive dependencies of a node.
+            """
             return all_deps[node_id]
 
-        # These are the only two methods coalesce_nodes calls.
-        # Can we enforce that somehow?
+        # These are the only two methods coalesce_nodes calls, so we can
+        # replace them to ensure our graph is used without actually needing to
+        # make any WDL objects for it.
+        #
+        # If that changes, the test will need to change! Maybe then it will be
+        # worth extracting a base type for this interface.
         with patch.object(WDLWorkflowGraph, 'is_decl', mock_is_decl):
             with patch.object(WDLWorkflowGraph, 'get_transitive_dependencies', mock_get_transitive_dependencies):
 

From 7e6e896b5227f4e2be5fd736e9282bee96add932 Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Tue, 1 Aug 2023 12:44:34 -0400
Subject: [PATCH 19/20] Properly exclude dependencies on WDL nodes in the same
 Toil job

---
 src/toil/test/wdl/wdltoil_test.py |  4 ++--
 src/toil/wdl/wdltoil.py           | 30 ++++++++++++++++--------------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/toil/test/wdl/wdltoil_test.py b/src/toil/test/wdl/wdltoil_test.py
index 2266cf9266..0d7abd4963 100644
--- a/src/toil/test/wdl/wdltoil_test.py
+++ b/src/toil/test/wdl/wdltoil_test.py
@@ -50,7 +50,7 @@ def test_conformance_tests_v10(self):
         p = subprocess.run(self.base_command + ["-v", "1.0", "-n", tests_to_run], capture_output=True)
 
         if p.returncode != 0:
-            print(p.stdout)
+            print(p.stdout.decode('utf-8', errors='replace'))
 
         p.check_returncode()
 
@@ -61,7 +61,7 @@ def test_conformance_tests_v11(self):
         p = subprocess.run(self.base_command + ["-v", "1.1", "-n", tests_to_run], capture_output=True)
 
         if p.returncode != 0:
-            print(p.stdout)
+            print(p.stdout.decode('utf-8', errors='replace'))
 
         p.check_returncode()
 
diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py
index a5850d5982..7cd6951a80 100755
--- a/src/toil/wdl/wdltoil.py
+++ b/src/toil/wdl/wdltoil.py
@@ -424,7 +424,7 @@ def __init__(self, file_store: AbstractFileStore):
         self.size = NonDownloadingSize(self)
 
         # Keep the file store around so we can access files.
-        self._file_store = file_store 
+        self._file_store = file_store
 
     def _is_url(self, filename: str, schemes: List[str] = ['http:', 'https:', 's3:', 'gs:', TOIL_URI_SCHEME]) -> bool:
         """
@@ -511,14 +511,14 @@ def _devirtualize_filename(self, filename: str) -> str:
         """
         Go from a virtualized WDL-side filename to a local disk filename.
 
-        Any WDL-side filenames which are paths will be paths in the container. 
+        Any WDL-side filenames which are paths will be paths in the container.
         """
         if self._is_url(filename):
             # We shouldn't have to deal with URLs here; we want to have exactly
             # two nicely stacked/back-to-back layers of virtualization, joined
             # on the out-of-container paths.
             raise RuntimeError(f"File {filename} is a URL but should already be an in-container-virtualized filename")
-        
+
         # If this is a local path it will be in the container. Make sure we
         # use the out-of-container equivalent.
         result = self.container.host_path(filename)
@@ -542,7 +542,7 @@ def _virtualize_filename(self, filename: str) -> str:
             self.container.add_paths([filename])
 
         result = self.container.input_path_map[filename]
-        
+
         logger.debug('Virtualized %s as WDL file %s', filename, result)
         return result
 
@@ -1716,7 +1716,7 @@ def get_transitive_dependencies(self, node_id: str) -> Set[str]:
                     queue.append(dep)
 
         return dependencies
-            
+
     def topological_order(self) -> List[str]:
         """
         Get a topological order of the nodes, based on their dependencies.
@@ -1785,7 +1785,7 @@ def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[Li
                 if not section_graph.is_decl(current_id) or not section_graph.is_decl(next_id):
                     # We can only combine decls with decls, so we can't go in
                     # the bucket.
-                    
+
                     # Finish the bucket.
                     to_return.append(current_bucket)
                     # Start a new one with this next node
@@ -1818,7 +1818,7 @@ def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[Li
             to_return.append(current_bucket)
 
         return to_return
-                        
+
 
 
     def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None) -> WDLBaseJob:
@@ -1878,10 +1878,12 @@ def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
         logger.debug('Creation jobs: %s', creation_jobs)
 
         for node_ids in creation_jobs:
-            # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
-            prev_node_ids = {prev_node_id for node_id in node_ids for prev_node_id in section_graph.get_dependencies(node_id)}
             logger.debug('Make Toil job for %s', node_ids)
-            
+            # Collect the return values from previous jobs. Some nodes may have been inputs, without jobs.
+            # Don't inlude stuff in the current batch.
+            prev_node_ids = {prev_node_id for node_id in node_ids for prev_node_id in section_graph.get_dependencies(node_id) if prev_node_id not in node_ids}
+
+
             # Get the Toil jobs we depend on
             prev_jobs = get_job_set_any(prev_node_ids)
             for prev_job in prev_jobs:
@@ -1893,7 +1895,7 @@ def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
             rvs: List[Union[WDLBindings, Promise]] = [prev_job.rv() for prev_job in prev_jobs]
             # We also need access to section-level bindings like inputs
             rvs.append(environment)
-            
+
             if len(node_ids) == 1:
                 # Make a one-node job
                 job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace)
@@ -1910,11 +1912,11 @@ def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
             if len(prev_jobs) == 0:
                 # Nothing came before this job, so connect it to the workflow.
                 self.addChild(job)
-            
+
             for node_id in node_ids:
                 # Save the job for everything it executes
                 wdl_id_to_toil_job[node_id] = job
-            
+
             # It isn't depended on yet
             toil_leaves[job.jobStoreID] = job
 
@@ -1943,7 +1945,7 @@ def get_job_set_any(wdl_ids: Set[str]) -> List[WDLBaseJob]:
         sink.then_underlay(self.make_gather_bindings(gather_nodes, WDL.Value.Null()))
         if local_environment is not None:
             sink.then_remove(local_environment)
-        
+
         return sink
 
     def make_gather_bindings(self, gathers: Sequence[WDL.Tree.Gather], undefined: WDL.Value.Base) -> WDLBindings:

From bdd238513cd52af8066569b3016e472a1e7af72f Mon Sep 17 00:00:00 2001
From: Adam Novak <anovak@soe.ucsc.edu>
Date: Wed, 2 Aug 2023 11:04:23 -0400
Subject: [PATCH 20/20] Stop insisting the Flatcar feeds be up

Flatcar has failed me for the last time
---
 src/toil/test/lib/test_ec2.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/toil/test/lib/test_ec2.py b/src/toil/test/lib/test_ec2.py
index 200541ca9d..19b8a9f556 100644
--- a/src/toil/test/lib/test_ec2.py
+++ b/src/toil/test/lib/test_ec2.py
@@ -14,6 +14,7 @@
 import logging
 import os
 import pytest
+import urllib
 
 from toil.lib.aws.ami import (aws_marketplace_flatcar_ami_search,
                               get_flatcar_ami,
@@ -25,14 +26,16 @@
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.DEBUG)
 
-
 class FlatcarFeedTest(ToilTest):
-    """Test accessing the FLatcar AMI release feed, independent of the AWS API"""
-    
+    """Test accessing the Flatcar AMI release feed, independent of the AWS API"""
+
+    # Note that we need to support getting an empty list back, because
+    # sometimes the Flatcar feeds are just down, and we can't fail CI at those
+    # times.
+
     def test_parse_archive_feed(self):
         """Make sure we can get a Flatcar release from the Internet Archive."""
         amis = list(flatcar_release_feed_amis('us-west-2', 'amd64', 'archive'))
-        self.assertTrue(len(amis) > 0)
         for ami in amis:
             self.assertEqual(len(ami), len('ami-02b46c73fed689d1c'))
             self.assertTrue(ami.startswith('ami-'))
@@ -40,26 +43,17 @@ def test_parse_archive_feed(self):
     def test_parse_beta_feed(self):
         """Make sure we can get a Flatcar release from the beta channel."""
         amis = list(flatcar_release_feed_amis('us-west-2', 'amd64', 'beta'))
-        self.assertTrue(len(amis) > 0)
         for ami in amis:
             self.assertEqual(len(ami), len('ami-02b46c73fed689d1c'))
             self.assertTrue(ami.startswith('ami-'))
     
-    # TODO: This will fail until https://github.com/flatcar/Flatcar/issues/962 is fixed
-    @pytest.mark.xfail
     def test_parse_stable_feed(self):
         """Make sure we can get a Flatcar release from the stable channel."""
         amis = list(flatcar_release_feed_amis('us-west-2', 'amd64', 'stable'))
-        self.assertTrue(len(amis) > 0)
         for ami in amis:
             self.assertEqual(len(ami), len('ami-02b46c73fed689d1c'))
             self.assertTrue(ami.startswith('ami-'))
             
-    def test_bypass_stable_feed(self):
-        """Make sure we can either get or safely not get a Flatcar release from the stable channel."""
-        list(flatcar_release_feed_amis('us-west-2', 'amd64', 'stable'))
-        # Ifd we get here we safely managed to iterate everything.
-
 @needs_aws_ec2
 class AMITest(ToilTest):
     @classmethod