From 5d9374bcf15ba5ab749981b85a72792b7abd803d Mon Sep 17 00:00:00 2001 From: akremin Date: Tue, 27 Aug 2024 14:10:19 -0700 Subject: [PATCH 1/6] More concise logging of proc tables read in for caches --- py/desispec/workflow/proctable.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/py/desispec/workflow/proctable.py b/py/desispec/workflow/proctable.py index 1ff121677..5346e2210 100644 --- a/py/desispec/workflow/proctable.py +++ b/py/desispec/workflow/proctable.py @@ -463,10 +463,21 @@ def read_minimal_tilenight_proctab_cols(nights=None, tileids=None, ## Load each relevant processing table file, subselect valid tilenight's and ## append to the full set ptab_files = sorted(ptab_files) + ## Less intrusive logging of files we're reading in + if len(ptab_files) > 0: + dirname = os.path.dirname(ptab_files[0]) + shortnames = [fil.replace(dirname+"/", '') for fil in ptab_files] + else: + dirname = '' + shortnames = [] + log.info(f"Loading the following processing tables for " + + f"tilenight processing table cache from directory: {dirname}, filenames: {shortnames}") + ptables = list() for ptab_file in ptab_files: ## correct way but slower and we don't need multivalue columns - t = load_table(tablename=ptab_file, tabletype='proctable') + t = load_table(tablename=ptab_file, tabletype='proctable', + suppress_logging=True) t = _select_tilenights_from_ptab(t) ## Need to ensure that the string columns are consistent @@ -626,10 +637,21 @@ def read_minimal_full_proctab_cols(nights=None, tileids=None, ## Load each relevant processing table file, subselect valid tilenight's and ## append to the full set ptab_files = sorted(ptab_files) + ## Less intrusive logging of files we're reading in + if len(ptab_files) > 0: + dirname = os.path.dirname(ptab_files[0]) + shortnames = [fil.replace(dirname+"/", '') for fil in ptab_files] + else: + dirname = '' + shortnames = [] + log.info(f"Loading the following processing tables for " + + f"tilenight processing table cache from directory: {dirname}, filenames: {shortnames}") + ptables = list() for ptab_file in ptab_files: ## correct way but slower and we don't need multivalue columns - t = load_table(tablename=ptab_file, tabletype='proctable') + t = load_table(tablename=ptab_file, tabletype='proctable', + suppress_logging=True) ## Need to ensure that the string columns are consistent for col in ['PROCCAMWORD']: From 00ac889b5812afd3fe2099579199314eec542808 Mon Sep 17 00:00:00 2001 From: akremin Date: Tue, 27 Aug 2024 14:11:43 -0700 Subject: [PATCH 2/6] only check queue in submit_prod if proc table does not exist --- py/desispec/scripts/submit_prod.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/py/desispec/scripts/submit_prod.py b/py/desispec/scripts/submit_prod.py index d5be16673..3302bee58 100644 --- a/py/desispec/scripts/submit_prod.py +++ b/py/desispec/scripts/submit_prod.py @@ -245,17 +245,22 @@ def submit_production(production_yaml, queue_threshold=4500, dry_run_level=False all_nights = sorted(all_nights) log.info(f"Processing {all_nights=}") for night in sorted(all_nights): + ## If proctable exists, assume we've already completed that night + if os.path.exists(findfile('proctable', night=night, readonly=True)): + skipped_nights.append(night) + log.info(f"{night=} already has a proctable, skipping.") + continue + + ## If the queue is too full, stop submitting nights num_in_queue = check_queue_count(user=user, include_scron=False, dry_run_level=dry_run_level) - ## In Jura the largest night had 115 jobs, to be conservative say 200 by default + ## In Jura the largest night had 115 jobs, to be conservative we submit + ## up to 4500 jobs (out of a 5000 limit) by default if num_in_queue > queue_threshold: log.info(f"{num_in_queue} jobs in the queue > {queue_threshold}," + " so stopping the job submissions.") break - if os.path.exists(findfile('proctable', night=night, readonly=True)): - skipped_nights.append(night) - log.info(f"{night=} already has a proctable, skipping.") - continue + ## We don't expect exposure tables to change during code execution here ## but we do expect processing tables to evolve, so clear that cache log.info(f"Processing {night=}") From 15946ddc0a08ef9d49c603e1052bc55526fb0c2e Mon Sep 17 00:00:00 2001 From: akremin Date: Tue, 27 Aug 2024 14:13:29 -0700 Subject: [PATCH 3/6] string alignment in caching print message --- py/desispec/workflow/proctable.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/py/desispec/workflow/proctable.py b/py/desispec/workflow/proctable.py index 5346e2210..beaaa24d0 100644 --- a/py/desispec/workflow/proctable.py +++ b/py/desispec/workflow/proctable.py @@ -470,8 +470,8 @@ def read_minimal_tilenight_proctab_cols(nights=None, tileids=None, else: dirname = '' shortnames = [] - log.info(f"Loading the following processing tables for " - + f"tilenight processing table cache from directory: {dirname}, filenames: {shortnames}") + log.info(f"Loading the following processing tables for tilenight processing" + + f" table cache from directory: {dirname}, filenames: {shortnames}") ptables = list() for ptab_file in ptab_files: @@ -644,8 +644,8 @@ def read_minimal_full_proctab_cols(nights=None, tileids=None, else: dirname = '' shortnames = [] - log.info(f"Loading the following processing tables for " - + f"tilenight processing table cache from directory: {dirname}, filenames: {shortnames}") + log.info(f"Loading the following processing tables for full processing " + + f"table cache from directory: {dirname}, filenames: {shortnames}") ptables = list() for ptab_file in ptab_files: From fefaec081551c00440424dd5ce197bff8c0a5629 Mon Sep 17 00:00:00 2001 From: akremin Date: Tue, 27 Aug 2024 14:24:46 -0700 Subject: [PATCH 4/6] incomplete set of cals is now a log WARNING rather than an ERROR --- py/desispec/workflow/calibration_selection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/desispec/workflow/calibration_selection.py b/py/desispec/workflow/calibration_selection.py index d802b79f0..e52edbce3 100644 --- a/py/desispec/workflow/calibration_selection.py +++ b/py/desispec/workflow/calibration_selection.py @@ -110,7 +110,7 @@ def determine_calibrations_to_proc(etable, do_cte_flats=True, + f"more information is known.") return etable[[]] else: - log.error(f"Only found {Counter(exptypes)} calibrations " + log.warning(f"Only found {Counter(exptypes)} calibrations " + "and not acquiring new data, so this may be fatal " + "if you aren't using an override file.") From edc329972b875313238fc010db84cc6bc4a695b2 Mon Sep 17 00:00:00 2001 From: akremin Date: Tue, 27 Aug 2024 14:35:27 -0700 Subject: [PATCH 5/6] fix proc_night to use existing proc rows in cal joint fits too --- py/desispec/scripts/proc_night.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/py/desispec/scripts/proc_night.py b/py/desispec/scripts/proc_night.py index f5191b8a5..9c7602575 100644 --- a/py/desispec/scripts/proc_night.py +++ b/py/desispec/scripts/proc_night.py @@ -756,6 +756,10 @@ def submit_calibrations(cal_etable, ptable, cal_override, calibjobs, int_id, arc_prows = [] for arc_erow in arcs: if arc_erow['EXPID'] in processed_cal_expids: + matches = np.where(ptable['EXPID'] == arc_erow['EXPID'])[0] + if len(matches) == 1: + prow = ptable[matches[0]] + arc_prows.append(prow) continue prow, int_id = make_exposure_prow(arc_erow, int_id, calibjobs) prow, ptable = create_submit_add_and_save(prow, ptable) @@ -775,6 +779,10 @@ def submit_calibrations(cal_etable, ptable, cal_override, calibjobs, int_id, flat_prows = [] for flat_erow in flats: if flat_erow['EXPID'] in processed_cal_expids: + matches = np.where(ptable['EXPID'] == flat_erow['EXPID'])[0] + if len(matches) == 1: + prow = ptable[matches[0]] + flat_prows.append(prow) continue jobdesc = 'flat' From c38e46f71094f37f849a55835c19e898ee352717 Mon Sep 17 00:00:00 2001 From: kremin Date: Tue, 27 Aug 2024 15:30:27 -0700 Subject: [PATCH 6/6] fix expid matching for cal prow fix --- py/desispec/scripts/proc_night.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/py/desispec/scripts/proc_night.py b/py/desispec/scripts/proc_night.py index 9c7602575..cc73e56e0 100644 --- a/py/desispec/scripts/proc_night.py +++ b/py/desispec/scripts/proc_night.py @@ -756,9 +756,12 @@ def submit_calibrations(cal_etable, ptable, cal_override, calibjobs, int_id, arc_prows = [] for arc_erow in arcs: if arc_erow['EXPID'] in processed_cal_expids: - matches = np.where(ptable['EXPID'] == arc_erow['EXPID'])[0] + matches = np.where([arc_erow['EXPID'] in itterprow['EXPID'] + for itterprow in ptable])[0] if len(matches) == 1: prow = ptable[matches[0]] + log.info("Found existing arc prow in ptable, " + + f"including it for psfnight job: {list(prow)}") arc_prows.append(prow) continue prow, int_id = make_exposure_prow(arc_erow, int_id, calibjobs) @@ -779,9 +782,12 @@ def submit_calibrations(cal_etable, ptable, cal_override, calibjobs, int_id, flat_prows = [] for flat_erow in flats: if flat_erow['EXPID'] in processed_cal_expids: - matches = np.where(ptable['EXPID'] == flat_erow['EXPID'])[0] + matches = np.where([flat_erow['EXPID'] in itterprow['EXPID'] + for itterprow in ptable])[0] if len(matches) == 1: prow = ptable[matches[0]] + log.info("Found existing flat prow in ptable, " + + f"including it for nightlyflat job: {list(prow)}") flat_prows.append(prow) continue