lsst · TallJimbo · Sep 9, 2024 · Sep 7, 2024
diff --git a/data/SConscript b/data/SConscript
@@ -117,6 +117,8 @@ rc2 = (
         clobber_outputs=True,
     )
     .add("step2a", where="skymap='ci_mw'")
+    # Test fail-and-rescue workflow by configuring one task fail on the first
+    # try, and then recover from that with a new run and --skip-existing-in.
     .add("step2b", where="skymap='ci_mw' AND tract=0")
     .add("step2cde")
     .add(
@@ -125,18 +127,20 @@ rc2 = (
         fail=[
             # Configure one quantum to fail on the first try and succeed on the
             # second, simulating an out-of-memory failure and automatic retry.
-            '''assembleCoadd::"skymap='ci_mw' AND tract=0 AND patch=2 AND band='r'":6GB'''
+            """assembleCoadd::"skymap='ci_mw' AND tract=0 AND patch=2 AND band='r'":6GB"""
         ],
         auto_retry_mem=("4GB", "8GB"),
     )
     .add("step4")
-    .add("step5", where="skymap='ci_mw' AND tract=0")
+    .add(
+        "step5",
+        where="skymap='ci_mw' AND tract=0",
+        group="attempt1",
+        fail=['''consolidateForcedSourceTable::"skymap='ci_mw' AND tract=0"'''],
+    )
+    .add("step5", where="skymap='ci_mw' AND tract=0", group="attempt2", skip_existing_in_last=True)
     .add("step6")
     .add("step7")
-    # Test fail-and-rescue workflow by configuring one task fail on the first
-    # try, and then recover from that with a new run and --skip-existing-in.
-    .add("step8", group="attempt1", fail=['''analyzeObjectTableCore::"skymap='ci_mw' AND tract=0"'''])
-    .add("step8", group="attempt2", skip_existing_in_last=True)
     .finish()
 )
 state.targets["data"].extend(rc2)
@@ -175,8 +179,6 @@ prod = (
     .add("step4", group="r", where="band='r'")
     .add("step4", group="i", where="band='i'")
     .add("step7")
-    # step8 in DRP-Prod is not actually valid, because it has tasks that should
-    # be partitioned in different ways in order to scale up.  This is DM-39314.
     .finish()
 )
 state.targets["data"].extend(prod)

diff --git a/tests/test_rc2_outputs.py b/tests/test_rc2_outputs.py
@@ -106,8 +106,8 @@ def test_property_set_metadata_direct(self) -> None:
     def test_property_set_metadata_qbb(self) -> None:
         self.qbb.check_property_set_metadata(self)
 
-    def check_step8_rescue(self, helper: OutputRepoTests) -> None:
-        """Test that the fail-and-recover attempts in step8 worked as expected,
+    def check_step5_rescue(self, helper: OutputRepoTests) -> None:
+        """Test that the fail-and-recover attempts in step5 worked as expected,
         by running all tasks but one in the first attempt.
         """
         # This task should have failed in attempt1 and should have been
@@ -116,40 +116,40 @@ def check_step8_rescue(self, helper: OutputRepoTests) -> None:
             [
                 ref.run
                 for ref in set(
-                    helper.butler.registry.queryDatasets(get_mock_name("analyzeObjectTableCore_metadata"))
+                    helper.butler.registry.queryDatasets(
+                        get_mock_name("consolidateForcedSourceTable_metadata")
+                    )
                 )
             ],
-            ["HSC/runs/RC2/step8-attempt2"],
+            ["HSC/runs/RC2/step5-attempt2"],
         )
         # This task should have succeeded in attempt1 and should not have been
         # included in attempt2.
         self.assertCountEqual(
             [
                 ref.run
                 for ref in set(
-                    helper.butler.registry.queryDatasets(
-                        get_mock_name("analyzeObjectTableSurveyCore_metadata")
-                    )
+                    helper.butler.registry.queryDatasets(get_mock_name("transformForcedSourceTable_metadata"))
                 )
             ],
-            ["HSC/runs/RC2/step8-attempt1"],
+            ["HSC/runs/RC2/step5-attempt1"] * 4,  # one for each patch,
         )
 
-    def test_step8_rescue_direct(self) -> None:
-        self.check_step8_rescue(self.direct)
+    def test_step5_rescue_direct(self) -> None:
+        self.check_step5_rescue(self.direct)
         # The attempt1 QG should have quanta for both tasks (and others, but we
         # won't list them all to avoid breaking if new ones are added).
-        qg_1 = self.direct.get_quantum_graph("step8", "attempt1")
-        qg_2 = self.direct.get_quantum_graph("step8", "attempt2")
+        qg_1 = self.direct.get_quantum_graph("step5", "attempt1")
+        qg_2 = self.direct.get_quantum_graph("step5", "attempt2")
         tasks_with_quanta_1 = {q.taskDef.label for q in qg_1}
         tasks_with_quanta_2 = {q.taskDef.label for q in qg_2}
-        self.assertIn(get_mock_name("analyzeObjectTableCore"), tasks_with_quanta_1)
-        self.assertIn(get_mock_name("analyzeObjectTableCore"), tasks_with_quanta_2)
-        self.assertIn(get_mock_name("analyzeObjectTableSurveyCore"), tasks_with_quanta_1)
-        self.assertNotIn(get_mock_name("analyzeObjectTableSurveyCore"), tasks_with_quanta_2)
+        self.assertIn(get_mock_name("consolidateForcedSourceTable"), tasks_with_quanta_1)
+        self.assertIn(get_mock_name("consolidateForcedSourceTable"), tasks_with_quanta_2)
+        self.assertIn(get_mock_name("transformForcedSourceTable"), tasks_with_quanta_1)
+        self.assertNotIn(get_mock_name("transformForcedSourceTable"), tasks_with_quanta_2)
 
     def test_step8_rescue_qbb(self) -> None:
-        self.check_step8_rescue(self.qbb)
+        self.check_step5_rescue(self.qbb)
 
     def test_fgcm_refcats(self) -> None:
         """Test that FGCM does not get refcats that don't overlap any of its