From 9eda74be8dffd8fc7698fa073734cbebc1394187 Mon Sep 17 00:00:00 2001 From: Yusra AlSayyad Date: Mon, 1 Jul 2024 14:54:18 -0700 Subject: [PATCH] Remove step8 and move its analysis tasks upstream Now that we're running RC2/DC2s with the sasquatch butler for all steps, we can move the analysis tasks to the step which creates their input data products. Move source table consolidation to the global step2 stage, and add consolidateSourceTable to the per-visit sourceTable cluster for [writeRecalibrated|transform|consolidate]SourceTable. --- bps/clustering/DRP-recalibrated.yaml | 4 +- bps/resources/HSC/DRP-RC2.yaml | 2 - pipelines/HSC/DRP-Prod.yaml | 33 ++++------ pipelines/HSC/DRP-RC2.yaml | 66 ++++++++----------- pipelines/_ingredients/LSSTCam-imSim/DRP.yaml | 41 +++++------- tests/test_pipelines.py | 4 +- 6 files changed, 60 insertions(+), 90 deletions(-) diff --git a/bps/clustering/DRP-recalibrated.yaml b/bps/clustering/DRP-recalibrated.yaml index 6f1b7913..d0c81ecd 100644 --- a/bps/clustering/DRP-recalibrated.yaml +++ b/bps/clustering/DRP-recalibrated.yaml @@ -36,8 +36,8 @@ cluster: pipetasks: assembleCoadd,inject_coadd,templateGen,detection dimensions: tract,patch,band - sourceTable: - pipetasks: writeRecalibratedSourceTable,transformSourceTable + finalizeSourceTable: + pipetasks: finalizeCharacterization,updateVisitSummary,writeRecalibratedSourceTable,transformSourceTable,consolidateSourceTable dimensions: visit equalDimensions: visit:exposure diff --git a/bps/resources/HSC/DRP-RC2.yaml b/bps/resources/HSC/DRP-RC2.yaml index cc331bbc..ddb4b991 100644 --- a/bps/resources/HSC/DRP-RC2.yaml +++ b/bps/resources/HSC/DRP-RC2.yaml @@ -64,8 +64,6 @@ pipetask: requestMemory: 10000 detection: requestMemory: 16384 - - # step8 tasks - sasquatch analyzeObjectTableCore: requestMemory: 16000 analyzeMatchedVisitCore: diff --git a/pipelines/HSC/DRP-Prod.yaml b/pipelines/HSC/DRP-Prod.yaml index a06c987a..96ec8278 100644 --- a/pipelines/HSC/DRP-Prod.yaml +++ b/pipelines/HSC/DRP-Prod.yaml @@ -24,6 +24,7 @@ subsets: - calibrate - writePreSourceTable - transformPreSourceTable + - analyzeAmpOffsetMetadata description: | Per-detector tasks that can be run together to start the DRP pipeline. @@ -99,6 +100,7 @@ subsets: and CcdVisits. step3: subset: + # per-patch tasks - makeDirectWarp - makePsfMatchedWarp - selectDeepCoaddVisits @@ -111,8 +113,18 @@ subsets: - forcedPhotCoadd - writeObjectTable - transformObjectTable + # per tract tasks - consolidateObjectTable - healSparsePropertyMaps + - analyzeObjectTableCore + - catalogMatchTract + - refCatObjectTract + - validateObjectTableCore + - analyzeMatchedVisitCore + - photometricCatalogMatch + - photometricRefCatObjectTract + - plotPropertyMapTract + description: | tract-level tasks. Allowed data query constraints: tract @@ -134,6 +146,7 @@ subsets: step7: subset: - consolidateHealSparsePropertyMaps + - analyzeObjectTableSurveyCore description: | Tasks that should be run as the final step that require global inputs, and can be run after the 'step3' subset. @@ -141,23 +154,3 @@ subsets: This step has global aggregation tasks to run over all visits, detectors, tracts, etc. This step should be run only with the instrument constraint in the data query. - step8: - subset: - - analyzeAmpOffsetMetadata - - analyzeObjectTableCore - - analyzeObjectTableSurveyCore - - catalogMatchTract - - refCatObjectTract - - validateObjectTableCore - - analyzeMatchedVisitCore - - photometricCatalogMatch - - photometricRefCatObjectTract - - plotPropertyMapTract - description: | - Core (most important), coadd-level plots/metrics from analysis_tools. - - Must be run after 'step3' (writeObjectTable). Cannot be run with any - dataquery constraints if analyzeObjectTableSurveyCore is included. - - Can be run together with analysis_drp subsets, e.g. pipeline.yaml - (hash/pound sign)step8,analysis_coadd_plots,imsim_analysis_coadd_plots diff --git a/pipelines/HSC/DRP-RC2.yaml b/pipelines/HSC/DRP-RC2.yaml index 4284faf9..b1e1ab5e 100644 --- a/pipelines/HSC/DRP-RC2.yaml +++ b/pipelines/HSC/DRP-RC2.yaml @@ -108,6 +108,7 @@ subsets: - calibrate - writePreSourceTable - transformPreSourceTable + - analyzeAmpOffsetMetadata description: | Per-detector tasks that can be run together to start the DRP pipeline. @@ -156,11 +157,17 @@ subsets: - fgcmFitCycle - fgcmOutputProducts - updateVisitSummary + - writeRecalibratedSourceTable + - transformSourceTable + - consolidateSourceTable + - catalogMatchVisit + - astrometricRefCatSourceVisit - makeCcdVisitTable - makeVisitTable description: | - Per-visit and per-collection tasks that can be run together after step2b - with no data query constraints other than instrument. + Per-detector, Per-visit, and per-collection tasks that can be run + together after step2b with no data query constraints other than + instrument. FGCM requires full visits and 'tract' and 'patch' constraints will always select partial visits that overlap that region. @@ -172,6 +179,7 @@ subsets: wrong (partial-visit) inputs to its 'background' connection. step3: subset: + # per-patch Tasks - makeDirectWarp - makePsfMatchedWarp - selectDeepCoaddVisits @@ -184,10 +192,20 @@ subsets: - forcedPhotCoadd - transformObjectTable - writeObjectTable - - consolidateObjectTable - healSparsePropertyMaps - selectGoodSeeingVisits - templateGen + # per-tract Tasks + - consolidateObjectTable + - analyzeMatchedVisitCore + - analyzeMatchedPreVisitCore + - analyzeObjectTableCore + - catalogMatchTract + - photometricCatalogMatch + - photometricRefCatObjectTract + - plotPropertyMapTract + - refCatObjectTract + - validateObjectTableCore description: | Tasks that can be run together, but only after the 'step1' and 'step2' subsets. @@ -207,8 +225,6 @@ subsets: - detectAndMeasureDiaSources - transformDiaSourceCat - writeForcedSourceTable - - writeRecalibratedSourceTable - - transformSourceTable description: | Tasks that can be run together, but only after the 'step1', 'step2' and 'step3' subsets @@ -241,9 +257,6 @@ subsets: step6: subset: - consolidateDiaSourceTable - - consolidateSourceTable - - catalogMatchVisit - - astrometricRefCatSourceVisit - sourceObjectMatch description: | Tasks that can be run together, but only after the 'step1', 'step2', @@ -262,6 +275,13 @@ subsets: step7: subset: - consolidateHealSparsePropertyMaps + - matchedVisitCoreWholeSkyPlot + - objectTableCoreWholeSkyPlot + - makeMetricTableMatchedVisitCore + - makeMetricTableObjectTableCore + - makeMetricTableObjectTableCoreRefCatMatch + - objectTableCoreRefCatMatchWholeSkyPlot + - analyzeObjectTableSurveyCore description: | Tasks that should be run as the final step that require global inputs, and can be run after the 'step3' subset. @@ -269,33 +289,3 @@ subsets: This step has global aggregation tasks to run over all visits, detectors, tracts, etc. This step should be run only with the instrument constraint in the data query. - step8: - subset: - - analyzeAmpOffsetMetadata - - analyzeMatchedVisitCore - - analyzeMatchedPreVisitCore - - analyzeObjectTableCore - - analyzeObjectTableSurveyCore - - catalogMatchTract - - makeMetricTableMatchedVisitCore - - makeMetricTableObjectTableCore - - makeMetricTableObjectTableCoreRefCatMatch - - matchedVisitCoreWholeSkyPlot - - objectTableCoreWholeSkyPlot - - objectTableCoreRefCatMatchWholeSkyPlot - - photometricCatalogMatch - - photometricRefCatObjectTract - - plotPropertyMapTract - - refCatObjectTract - - validateObjectTableCore - description: | - Core (most important), coadd-level plots/metrics from analysis_tools. - - Must be run after 'step3' (writeObjectTable). If the task - analyzeObjectTableSurveyCore is included, should be run with either no - data query constraints or with a tract constraint that *exactly* matches - that from 'step3'. Running with only a subset of tracts from step3 will - result in incomplete survey-level outputs. - - Can be run together with other analysis_drp/analysis_tools subsets, e.g. - 'pipeline.yaml#step8,analysis_coadd_plots,imsim_analysis_coadd_plots' diff --git a/pipelines/_ingredients/LSSTCam-imSim/DRP.yaml b/pipelines/_ingredients/LSSTCam-imSim/DRP.yaml index 6b0e884e..1bc009b4 100644 --- a/pipelines/_ingredients/LSSTCam-imSim/DRP.yaml +++ b/pipelines/_ingredients/LSSTCam-imSim/DRP.yaml @@ -125,6 +125,7 @@ subsets: - calibrate - writeSourceTable - transformSourceTable + - analyzeAmpOffsetMetadata description: | Per-detector tasks that can be run together to start the DRP pipeline. @@ -144,6 +145,8 @@ subsets: - makeCcdVisitTable - makeVisitTable - updateVisitSummary + - catalogMatchVisit + - astrometricRefCatSourceVisit description: | Tasks that can be run together, but only after the 'step1'. @@ -161,6 +164,7 @@ subsets: Full collection-level tasks include: makeCcdVisitTable, makeVisitTable step3: subset: + # Per-patch Tasks - makeDirectWarp - makePsfMatchedWarp - selectDeepCoaddVisits @@ -173,12 +177,20 @@ subsets: - forcedPhotCoadd - transformObjectTable - writeObjectTable - - consolidateObjectTable - - healSparsePropertyMaps - selectGoodSeeingVisits - templateGen + # Per-tract Tasks + - consolidateObjectTable + - healSparsePropertyMaps - matchObjectToTruth - compareObjectToTruth + - analyzeMatchedVisitCore + - analyzeObjectTableCore + - catalogMatchTract + - diff_matched_analysis + - refCatObjectTract + - validateObjectTableCore + description: | Tasks that can be run together, but only after the 'step1' and 'step2' subsets. @@ -236,8 +248,6 @@ subsets: step6: subset: - consolidateDiaSourceTable - - catalogMatchVisit - - astrometricRefCatSourceVisit - sourceObjectMatch description: | Tasks that can be run together, but only after the 'step1', 'step2', @@ -255,6 +265,7 @@ subsets: step7: subset: - consolidateHealSparsePropertyMaps + - analyzeObjectTableSurveyCore description: | Tasks that should be run as the final step that require global inputs, and can be run after the 'step3' subset. @@ -262,28 +273,6 @@ subsets: This step has global aggregation tasks to run over all visits, detectors, tracts, etc. This step should be run only with the instrument constraint in the data query. - step8: - subset: - - analyzeAmpOffsetMetadata - - analyzeMatchedVisitCore - - analyzeObjectTableCore - - analyzeObjectTableSurveyCore - - catalogMatchTract - - diff_matched_analysis - - refCatObjectTract - - validateObjectTableCore - - description: | - Core (most important), coadd-level plots/metrics from analysis_tools. - - Must be run after 'step3' (writeObjectTable). Cannot be run with any - dataquery constraints if analyzeObjectTableSurveyCore is included. - - Can be run together with analysis_drp subsets, e.g. pipeline.yaml - (hash/pound sign)step8,analysis_coadd_plots,imsim_analysis_coadd_plots - - Includes extended-level diff_matched_analysis metrics, which are - considered core for DC2 as they can't be run on other datasets yet. faro_visit: subset: - nsrcMeasVisit diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index d3cd6470..bc1b3cad 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -524,7 +524,7 @@ def test_lsstcam_imsim_drp_ci_imsim(self): butler = self.makeButler(writeable=True) tester = PipelineStepTester( os.path.join(PIPELINES_DIR, "LSSTCam-imSim", "DRP-ci_imsim.yaml"), - [f"#step{N}" for N in range(1, 9)], + [f"#step{N}" for N in range(1, 8)], [ ("cal_ref_cat_2_2", {"htm7"}, "SimpleCatalog", False), ], @@ -537,7 +537,7 @@ def test_lsstcam_imsim_drp_test_med_1(self): butler = self.makeButler(writeable=True) tester = PipelineStepTester( os.path.join(PIPELINES_DIR, "LSSTCam-imSim", "DRP-test-med-1.yaml"), - [f"#step{N}" for N in range(1, 9)], + [f"#step{N}" for N in range(1, 8)], [ ("cal_ref_cat_2_2", {"htm7"}, "SimpleCatalog", False), ],