From 9eda74be8dffd8fc7698fa073734cbebc1394187 Mon Sep 17 00:00:00 2001
From: Yusra AlSayyad <yusra@astro.princeton.edu>
Date: Mon, 1 Jul 2024 14:54:18 -0700
Subject: [PATCH] Remove step8 and move its analysis tasks upstream

Now that we're running RC2/DC2s with the sasquatch butler for all
steps, we can move the analysis tasks to the step which creates
their input data products.

Move source table consolidation to the global step2 stage, and
add consolidateSourceTable to the per-visit sourceTable cluster
for [writeRecalibrated|transform|consolidate]SourceTable.
---
 bps/clustering/DRP-recalibrated.yaml          |  4 +-
 bps/resources/HSC/DRP-RC2.yaml                |  2 -
 pipelines/HSC/DRP-Prod.yaml                   | 33 ++++------
 pipelines/HSC/DRP-RC2.yaml                    | 66 ++++++++-----------
 pipelines/_ingredients/LSSTCam-imSim/DRP.yaml | 41 +++++-------
 tests/test_pipelines.py                       |  4 +-
 6 files changed, 60 insertions(+), 90 deletions(-)

diff --git a/bps/clustering/DRP-recalibrated.yaml b/bps/clustering/DRP-recalibrated.yaml
index 6f1b7913..d0c81ecd 100644
--- a/bps/clustering/DRP-recalibrated.yaml
+++ b/bps/clustering/DRP-recalibrated.yaml
@@ -36,8 +36,8 @@ cluster:
     pipetasks: assembleCoadd,inject_coadd,templateGen,detection
     dimensions: tract,patch,band
 
-  sourceTable:
-    pipetasks: writeRecalibratedSourceTable,transformSourceTable
+  finalizeSourceTable:
+    pipetasks: finalizeCharacterization,updateVisitSummary,writeRecalibratedSourceTable,transformSourceTable,consolidateSourceTable
     dimensions: visit
     equalDimensions: visit:exposure
 
diff --git a/bps/resources/HSC/DRP-RC2.yaml b/bps/resources/HSC/DRP-RC2.yaml
index cc331bbc..ddb4b991 100644
--- a/bps/resources/HSC/DRP-RC2.yaml
+++ b/bps/resources/HSC/DRP-RC2.yaml
@@ -64,8 +64,6 @@ pipetask:
     requestMemory: 10000
   detection:
     requestMemory: 16384
-
-  # step8 tasks - sasquatch
   analyzeObjectTableCore:
     requestMemory: 16000
   analyzeMatchedVisitCore:
diff --git a/pipelines/HSC/DRP-Prod.yaml b/pipelines/HSC/DRP-Prod.yaml
index a06c987a..96ec8278 100644
--- a/pipelines/HSC/DRP-Prod.yaml
+++ b/pipelines/HSC/DRP-Prod.yaml
@@ -24,6 +24,7 @@ subsets:
       - calibrate
       - writePreSourceTable
       - transformPreSourceTable
+      - analyzeAmpOffsetMetadata
     description: |
       Per-detector tasks that can be run together to start the DRP pipeline.
 
@@ -99,6 +100,7 @@ subsets:
       and CcdVisits.
   step3:
     subset:
+      # per-patch tasks
       - makeDirectWarp
       - makePsfMatchedWarp
       - selectDeepCoaddVisits
@@ -111,8 +113,18 @@ subsets:
       - forcedPhotCoadd
       - writeObjectTable
       - transformObjectTable
+      # per tract tasks
       - consolidateObjectTable
       - healSparsePropertyMaps
+      - analyzeObjectTableCore
+      - catalogMatchTract
+      - refCatObjectTract
+      - validateObjectTableCore
+      - analyzeMatchedVisitCore
+      - photometricCatalogMatch
+      - photometricRefCatObjectTract
+      - plotPropertyMapTract
+
     description: |
       tract-level tasks.  Allowed data query constraints: tract
 
@@ -134,6 +146,7 @@ subsets:
   step7:
     subset:
       - consolidateHealSparsePropertyMaps
+      - analyzeObjectTableSurveyCore
     description: |
       Tasks that should be run as the final step that require global inputs,
       and can be run after the 'step3' subset.
@@ -141,23 +154,3 @@ subsets:
       This step has global aggregation tasks to run over all visits, detectors,
       tracts, etc.  This step should be run only with the instrument constraint
       in the data query.
-  step8:
-    subset:
-      - analyzeAmpOffsetMetadata
-      - analyzeObjectTableCore
-      - analyzeObjectTableSurveyCore
-      - catalogMatchTract
-      - refCatObjectTract
-      - validateObjectTableCore
-      - analyzeMatchedVisitCore
-      - photometricCatalogMatch
-      - photometricRefCatObjectTract
-      - plotPropertyMapTract
-    description: |
-      Core (most important), coadd-level plots/metrics from analysis_tools.
-
-      Must be run after 'step3' (writeObjectTable). Cannot be run with any
-      dataquery constraints if analyzeObjectTableSurveyCore is included.
-
-      Can be run together with analysis_drp subsets, e.g. pipeline.yaml
-      (hash/pound sign)step8,analysis_coadd_plots,imsim_analysis_coadd_plots
diff --git a/pipelines/HSC/DRP-RC2.yaml b/pipelines/HSC/DRP-RC2.yaml
index 4284faf9..b1e1ab5e 100644
--- a/pipelines/HSC/DRP-RC2.yaml
+++ b/pipelines/HSC/DRP-RC2.yaml
@@ -108,6 +108,7 @@ subsets:
       - calibrate
       - writePreSourceTable
       - transformPreSourceTable
+      - analyzeAmpOffsetMetadata
     description: |
       Per-detector tasks that can be run together to start the DRP pipeline.
 
@@ -156,11 +157,17 @@ subsets:
       - fgcmFitCycle
       - fgcmOutputProducts
       - updateVisitSummary
+      - writeRecalibratedSourceTable
+      - transformSourceTable
+      - consolidateSourceTable
+      - catalogMatchVisit
+      - astrometricRefCatSourceVisit
       - makeCcdVisitTable
       - makeVisitTable
     description: |
-      Per-visit and per-collection tasks that can be run together after step2b
-      with no data query constraints other than instrument.
+      Per-detector, Per-visit, and per-collection tasks that can be run
+      together after step2b with no data query constraints other than
+      instrument.
 
       FGCM requires full visits and 'tract' and 'patch' constraints will
       always select partial visits that overlap that region.
@@ -172,6 +179,7 @@ subsets:
       wrong (partial-visit) inputs to its 'background' connection.
   step3:
     subset:
+      # per-patch Tasks
       - makeDirectWarp
       - makePsfMatchedWarp
       - selectDeepCoaddVisits
@@ -184,10 +192,20 @@ subsets:
       - forcedPhotCoadd
       - transformObjectTable
       - writeObjectTable
-      - consolidateObjectTable
       - healSparsePropertyMaps
       - selectGoodSeeingVisits
       - templateGen
+      # per-tract Tasks
+      - consolidateObjectTable
+      - analyzeMatchedVisitCore
+      - analyzeMatchedPreVisitCore
+      - analyzeObjectTableCore
+      - catalogMatchTract
+      - photometricCatalogMatch
+      - photometricRefCatObjectTract
+      - plotPropertyMapTract
+      - refCatObjectTract
+      - validateObjectTableCore
     description: |
       Tasks that can be run together, but only after the 'step1' and 'step2'
       subsets.
@@ -207,8 +225,6 @@ subsets:
       - detectAndMeasureDiaSources
       - transformDiaSourceCat
       - writeForcedSourceTable
-      - writeRecalibratedSourceTable
-      - transformSourceTable
     description: |
       Tasks that can be run together, but only after the 'step1', 'step2' and
       'step3' subsets
@@ -241,9 +257,6 @@ subsets:
   step6:
     subset:
       - consolidateDiaSourceTable
-      - consolidateSourceTable
-      - catalogMatchVisit
-      - astrometricRefCatSourceVisit
       - sourceObjectMatch
     description: |
       Tasks that can be run together, but only after the 'step1', 'step2',
@@ -262,6 +275,13 @@ subsets:
   step7:
     subset:
       - consolidateHealSparsePropertyMaps
+      - matchedVisitCoreWholeSkyPlot
+      - objectTableCoreWholeSkyPlot
+      - makeMetricTableMatchedVisitCore
+      - makeMetricTableObjectTableCore
+      - makeMetricTableObjectTableCoreRefCatMatch
+      - objectTableCoreRefCatMatchWholeSkyPlot
+      - analyzeObjectTableSurveyCore
     description: |
       Tasks that should be run as the final step that require global inputs,
       and can be run after the 'step3' subset.
@@ -269,33 +289,3 @@ subsets:
       This step has global aggregation tasks to run over all visits, detectors,
       tracts, etc.  This step should be run only with the instrument constraint
       in the data query.
-  step8:
-    subset:
-      - analyzeAmpOffsetMetadata
-      - analyzeMatchedVisitCore
-      - analyzeMatchedPreVisitCore
-      - analyzeObjectTableCore
-      - analyzeObjectTableSurveyCore
-      - catalogMatchTract
-      - makeMetricTableMatchedVisitCore
-      - makeMetricTableObjectTableCore
-      - makeMetricTableObjectTableCoreRefCatMatch
-      - matchedVisitCoreWholeSkyPlot
-      - objectTableCoreWholeSkyPlot
-      - objectTableCoreRefCatMatchWholeSkyPlot
-      - photometricCatalogMatch
-      - photometricRefCatObjectTract
-      - plotPropertyMapTract
-      - refCatObjectTract
-      - validateObjectTableCore
-    description: |
-      Core (most important), coadd-level plots/metrics from analysis_tools.
-
-      Must be run after 'step3' (writeObjectTable). If the task
-      analyzeObjectTableSurveyCore is included, should be run with either no
-      data query constraints or with a tract constraint that *exactly* matches
-      that from 'step3'. Running with only a subset of tracts from step3 will
-      result in incomplete survey-level outputs.
-
-      Can be run together with other analysis_drp/analysis_tools subsets, e.g.
-      'pipeline.yaml#step8,analysis_coadd_plots,imsim_analysis_coadd_plots'
diff --git a/pipelines/_ingredients/LSSTCam-imSim/DRP.yaml b/pipelines/_ingredients/LSSTCam-imSim/DRP.yaml
index 6b0e884e..1bc009b4 100644
--- a/pipelines/_ingredients/LSSTCam-imSim/DRP.yaml
+++ b/pipelines/_ingredients/LSSTCam-imSim/DRP.yaml
@@ -125,6 +125,7 @@ subsets:
       - calibrate
       - writeSourceTable
       - transformSourceTable
+      - analyzeAmpOffsetMetadata
     description: |
       Per-detector tasks that can be run together to start the DRP pipeline.
 
@@ -144,6 +145,8 @@ subsets:
       - makeCcdVisitTable
       - makeVisitTable
       - updateVisitSummary
+      - catalogMatchVisit
+      - astrometricRefCatSourceVisit
     description: |
       Tasks that can be run together, but only after the 'step1'.
 
@@ -161,6 +164,7 @@ subsets:
       Full collection-level tasks include: makeCcdVisitTable, makeVisitTable
   step3:
     subset:
+      # Per-patch Tasks
       - makeDirectWarp
       - makePsfMatchedWarp
       - selectDeepCoaddVisits
@@ -173,12 +177,20 @@ subsets:
       - forcedPhotCoadd
       - transformObjectTable
       - writeObjectTable
-      - consolidateObjectTable
-      - healSparsePropertyMaps
       - selectGoodSeeingVisits
       - templateGen
+      # Per-tract Tasks
+      - consolidateObjectTable
+      - healSparsePropertyMaps
       - matchObjectToTruth
       - compareObjectToTruth
+      - analyzeMatchedVisitCore
+      - analyzeObjectTableCore
+      - catalogMatchTract
+      - diff_matched_analysis
+      - refCatObjectTract
+      - validateObjectTableCore
+
     description: |
       Tasks that can be run together, but only after the 'step1' and 'step2'
       subsets.
@@ -236,8 +248,6 @@ subsets:
   step6:
     subset:
       - consolidateDiaSourceTable
-      - catalogMatchVisit
-      - astrometricRefCatSourceVisit
       - sourceObjectMatch
     description: |
       Tasks that can be run together, but only after the 'step1', 'step2',
@@ -255,6 +265,7 @@ subsets:
   step7:
     subset:
       - consolidateHealSparsePropertyMaps
+      - analyzeObjectTableSurveyCore
     description: |
       Tasks that should be run as the final step that require global inputs,
       and can be run after the 'step3' subset.
@@ -262,28 +273,6 @@ subsets:
       This step has global aggregation tasks to run over all visits, detectors,
       tracts, etc.  This step should be run only with the instrument constraint
       in the data query.
-  step8:
-    subset:
-      - analyzeAmpOffsetMetadata
-      - analyzeMatchedVisitCore
-      - analyzeObjectTableCore
-      - analyzeObjectTableSurveyCore
-      - catalogMatchTract
-      - diff_matched_analysis
-      - refCatObjectTract
-      - validateObjectTableCore
-
-    description: |
-      Core (most important), coadd-level plots/metrics from analysis_tools.
-
-      Must be run after 'step3' (writeObjectTable). Cannot be run with any
-      dataquery constraints if analyzeObjectTableSurveyCore is included.
-
-      Can be run together with analysis_drp subsets, e.g. pipeline.yaml
-      (hash/pound sign)step8,analysis_coadd_plots,imsim_analysis_coadd_plots
-
-      Includes extended-level diff_matched_analysis metrics, which are
-      considered core for DC2 as they can't be run on other datasets yet.
   faro_visit:
     subset:
       - nsrcMeasVisit
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index d3cd6470..bc1b3cad 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -524,7 +524,7 @@ def test_lsstcam_imsim_drp_ci_imsim(self):
         butler = self.makeButler(writeable=True)
         tester = PipelineStepTester(
             os.path.join(PIPELINES_DIR, "LSSTCam-imSim", "DRP-ci_imsim.yaml"),
-            [f"#step{N}" for N in range(1, 9)],
+            [f"#step{N}" for N in range(1, 8)],
             [
                 ("cal_ref_cat_2_2", {"htm7"}, "SimpleCatalog", False),
             ],
@@ -537,7 +537,7 @@ def test_lsstcam_imsim_drp_test_med_1(self):
         butler = self.makeButler(writeable=True)
         tester = PipelineStepTester(
             os.path.join(PIPELINES_DIR, "LSSTCam-imSim", "DRP-test-med-1.yaml"),
-            [f"#step{N}" for N in range(1, 9)],
+            [f"#step{N}" for N in range(1, 8)],
             [
                 ("cal_ref_cat_2_2", {"htm7"}, "SimpleCatalog", False),
             ],