diff --git a/bin/desi_reformat_exposure_tables b/bin/desi_reformat_exposure_tables deleted file mode 100755 index e651dd34a..000000000 --- a/bin/desi_reformat_exposure_tables +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -import argparse - -## Import some helper functions, you can see their definitions by uncomenting the bash shell command -from desispec.scripts.reformat_exptables import update_exposure_tables - - -def get_parser(): - """ - Creates an arguments parser for the desi_reformat_exposure_tables script - """ - parser = argparse.ArgumentParser(usage = "{prog} [options]") - parser.add_argument("-n", "--nights", type=str, default=None, help="nights as comma separated string") - parser.add_argument("--night-range", type=str, default=None, help="comma separated pair of nights in form YYYYMMDD,YYYYMMDD"+\ - "for first_night,last_night specifying the beginning"+\ - "and end of a range of nights to be generated. "+\ - "last_night should be inclusive.") - parser.add_argument("--obstypes", type=str, default=None, help="comma separated list of exposure types to include in "+\ - "the exposure table, e.g. science,arc,flat,dark,zero, ...") - parser.add_argument("-i", "--path-to-data", type=str, default=None, help="path to the raw input data") - parser.add_argument("-o","--exp-table-path", type=str, default=None, help="path to save exposure tables, without monthly subdirectory") - parser.add_argument("--orig-filetype", type=str, default='csv', help="format type for original exposure tables") - parser.add_argument("--out-filetype", type=str, default='csv', help="format type for output exposure tables") - parser.add_argument("--verbose", action="store_true", help="print verbose output") - parser.add_argument("--dry-run", action="store_true", - help="Perform a dry run, printing the changes that would be made and the final output table "+ - "but not overwriting the actual files on disk.") - parser.add_argument("--no-specprod", action="store_true", help="Create exposure table in repository location "+\ - "rather than the SPECPROD location.") - return parser - - - -if __name__ == '__main__': - parser = get_parser() - args = parser.parse_args() - - update_exposure_tables(**args.__dict__) \ No newline at end of file diff --git a/bin/desi_reformat_exptables b/bin/desi_reformat_exptables new file mode 100755 index 000000000..8852bd9c1 --- /dev/null +++ b/bin/desi_reformat_exptables @@ -0,0 +1,12 @@ +#!/usr/bin/env python +# coding: utf-8 + +## Import some helper functions, you can see their definitions by uncomenting the bash shell command +from desispec.scripts.reformat_exptables import get_parser, reformat_exposure_tables + + +if __name__ == '__main__': + parser = get_parser() + args = parser.parse_args() + + reformat_exposure_tables(**args.__dict__) \ No newline at end of file diff --git a/bin/desi_reformat_proctables b/bin/desi_reformat_proctables new file mode 100755 index 000000000..ed4e8107e --- /dev/null +++ b/bin/desi_reformat_proctables @@ -0,0 +1,14 @@ +#!/usr/bin/env python +# coding: utf-8 + +## Import some helper functions, you can see their definitions by uncomenting the bash shell command +from desispec.scripts.reformat_proctables import get_parser, reformat_processing_tables + + + + +if __name__ == '__main__': + parser = get_parser() + args = parser.parse_args() + + reformat_processing_tables(**args.__dict__) \ No newline at end of file diff --git a/doc/api.rst b/doc/api.rst index a78702117..a445e652c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -587,6 +587,9 @@ desispec API .. automodule:: desispec.scripts.reformat_exptables :members: +.. automodule:: desispec.scripts.reformat_proctables + :members: + .. automodule:: desispec.scripts.rejectcosmics :members: diff --git a/py/desispec/scripts/reformat_exptables.py b/py/desispec/scripts/reformat_exptables.py index 115bd10ac..3cc1837b5 100644 --- a/py/desispec/scripts/reformat_exptables.py +++ b/py/desispec/scripts/reformat_exptables.py @@ -1,8 +1,9 @@ """ -desispec.scripts.updateexptables -================================ +desispec.scripts.reformat_exptables +=================================== """ +import argparse import os import sys import numpy as np @@ -20,8 +21,31 @@ from desispec.scripts.exposuretable import create_exposure_tables +def get_parser(): + """ + Creates an arguments parser for the desi_reformat_exposure_tables script + """ + parser = argparse.ArgumentParser(usage = "{prog} [options]") + parser.add_argument("-n", "--nights", type=str, default=None, help="nights as comma separated string") + parser.add_argument("--night-range", type=str, default=None, help="comma separated pair of nights in form YYYYMMDD,YYYYMMDD"+\ + "for first_night,last_night specifying the beginning"+\ + "and end of a range of nights to be generated. "+\ + "last_night should be inclusive.") + parser.add_argument("--obstypes", type=str, default=None, help="comma separated list of exposure types to include in "+\ + "the exposure table, e.g. science,arc,flat,dark,zero, ...") + parser.add_argument("-i", "--path-to-data", type=str, default=None, help="path to the raw input data") + parser.add_argument("-o","--exp-table-path", type=str, default=None, help="path to save exposure tables, without monthly subdirectory") + parser.add_argument("--orig-filetype", type=str, default='csv', help="format type for original exposure tables") + parser.add_argument("--out-filetype", type=str, default='csv', help="format type for output exposure tables") + parser.add_argument("--verbose", action="store_true", help="print verbose output") + parser.add_argument("--dry-run", action="store_true", + help="Perform a dry run, printing the changes that would be made and the final output table "+ + "but not overwriting the actual files on disk.") + parser.add_argument("--no-specprod", action="store_true", help="Create exposure table in repository location "+\ + "rather than the SPECPROD location.") + return parser -def update_exposure_tables(nights=None, night_range=None, path_to_data=None, +def reformat_exposure_tables(nights=None, night_range=None, path_to_data=None, exp_table_path=None, obstypes=None, orig_filetype='csv', out_filetype='csv', verbose=False, no_specprod=False, dry_run=False): diff --git a/py/desispec/scripts/reformat_proctables.py b/py/desispec/scripts/reformat_proctables.py new file mode 100644 index 000000000..20ac554cb --- /dev/null +++ b/py/desispec/scripts/reformat_proctables.py @@ -0,0 +1,194 @@ +""" +desispec.scripts.reformat_proctables +==================================== + +""" +import argparse +import os +import glob +import sys +import numpy as np +import re +import time +from astropy.table import Table + +from desispec.io.meta import findfile +from desispec.workflow.proctable import get_processing_table_column_defs +from desispec.workflow.utils import define_variable_from_environment, listpath, \ + pathjoin +from desispec.workflow.tableio import write_table, load_table +from desispec.scripts.exposuretable import create_exposure_tables + + +def get_parser(): + """ + Creates an arguments parser for the desi_reformat_processing_tables script + """ + parser = argparse.ArgumentParser(usage = "{prog} [options]") + parser.add_argument("-n", "--nights", type=str, default=None, help="nights as comma separated string") + parser.add_argument("--night-range", type=str, default=None, help="comma separated pair of nights in form YYYYMMDD,YYYYMMDD"+\ + "for first_night,last_night specifying the beginning"+\ + "and end of a range of nights to be generated. "+\ + "last_night should be inclusive.") + parser.add_argument("--orig-filetype", type=str, default='csv', help="format type for original exposure tables") + parser.add_argument("--out-filetype", type=str, default='csv', help="format type for output exposure tables") + parser.add_argument("--dry-run", action="store_true", + help="Perform a dry run, printing the changes that would be made and the final output table "+ + "but not overwriting the actual files on disk.") + return parser + +def reformat_processing_tables(nights=None, night_range=None, orig_filetype='csv', + out_filetype='csv', dry_run=False): + """ + Generates updated processing tables for the nights requested. Requires + a current processing table to exist on disk. + + Args: + nights: str, int, or comma separated list. The night(s) to generate + processing tables for. + night_range: str. comma separated pair of nights in form + YYYYMMDD,YYYYMMDD for first_night,last_night + specifying the beginning and end of a range of + nights to be generated. first_night and last_night are + inclusive. + orig_filetype: str. The file extension (without the '.') of the processing + tables. + out_filetype: str. The file extension for the outputted processing tables + (without the '.'). + + Returns: + Nothing + """ + # log = get_logger() + ## Make sure user specified what nights to run on + if nights is None and night_range is None: + raise ValueError("Must specify either nights or night_range." + +" To process all nights give nights=all") + + ## Get all nights in 2020's with data + proctab_template = findfile('proctable', night=99999999) + proctab_template = proctab_template.replace('99999999', '202[0-9][01][0-9][0-3][0-9]') + proctab_template = proctab_template.replace('.csv', f'.{orig_filetype}') + nights_with_proctables = list() + for ptabfn in glob.glob(proctab_template): + ## nights are 202YMMDD + matches = re.findall('202\d{5}', os.path.basename(ptabfn)) + if len(matches) == 1: + n = int(matches[0]) + nights_with_proctables.append(n) + else: + print(f"Couldn't parse a night from proctable file: {ptabfn}") + + ## If unpecified or given "all", set nights to all nights with data + check_night = False + if nights is None or nights == 'all': + nights = nights_with_proctables + ## No need to check nights since derived from disk + else: + nights = [int(val.strip()) for val in nights.split(",")] + ## If nights are specified, make sure we check that there is actually data + check_night = True + nights = np.sort(nights) + + ## If user specified a night range, cut nights to that range of dates + if night_range is not None: + if ',' not in night_range: + raise ValueError("night_range must be a comma separated pair of " + + "nights in form YYYYMMDD,YYYYMMDD") + nightpair = night_range.split(',') + if len(nightpair) != 2 or not nightpair[0].isnumeric() \ + or not nightpair[1].isnumeric(): + raise ValueError("night_range must be a comma separated pair of " + + "nights in form YYYYMMDD,YYYYMMDD") + first_night, last_night = nightpair + nights = nights[np.where(int(first_night) <= nights.astype(int))[0]] + nights = nights[np.where(int(last_night) >= nights.astype(int))[0]] + + ## Get current set of expected columns + ptab_cols, ptab_dtypes, ptab_defs = get_processing_table_column_defs(return_default_values=True) + ptab_cols, ptab_dtypes = np.array(ptab_cols), np.array(ptab_dtypes) + + ## Tell user the final list of nights and starting looping over them + print("Nights: ", nights) + for night in nights: + if check_night and night not in nights_with_proctables: + print(f"Night {night} doesn't have a processing table: Skipping.") + continue + + ## If the processing table doesn't exist, skip, since we are updating + ## not generating. + orig_pathname = findfile('proctable', night=night).replace('.csv', f'.{orig_filetype}') + if not os.path.exists(orig_pathname): + print(f'Could not find processing table for night={night} at:' + + f' {orig_pathname}. Skipping this night.') + continue + + ## Load the old and new tables to compare + origtable = load_table(orig_pathname, tabletype='proctab') + curr_colnames = np.array(list(origtable.colnames)) + expected_cols = np.isin(curr_colnames, ptab_cols) + found_cols = np.isin(ptab_cols, curr_colnames) + + ## If everything is present, don't try to do anything + if np.all(expected_cols) and np.all(found_cols): + print(f"{orig_pathname} has all of the expected columns, not updating this table.") + continue + + unexpected = list(curr_colnames[~expected_cols]) + missing = list(ptab_cols[~found_cols]) + print(f"Found the following unexpected columns: {unexpected}") + print(f"Found the following missing columns: {missing}") + + ## Solving the only cases I'm currently aware of + if 'CAMWORD' in unexpected and 'PROCCAMWORD' in missing: + print(f"CAMWORD listed instead of PROCCAMWORD. Updating that.") + origtable.rename_column('CAMWORD', 'PROCCAMWORD') + unexpected.remove('CAWORD') + missing.remove('PROCCAMWORD') + + if len(unexpected) > 0: + print(f"WARNING: Script detected unexpected columns. Only handle " + + f"the case where 'CAMWORD' is defined instead of PROCCAMWORD. " + + f"The following unexpected columns will be dropped without " + + f"using the information they contain: {unexpected}.") + for colname in unexpected: + origtable.remove_column(colname) + + ## Add any missing columns + for colname in missing: + if colname not in ['BADAMPS', 'LASTSTEP', 'EXPFLAG']: + print(f"WARNING: Script didn't expect {colname} to be missing. " + + f"Replacing with default values, but this may have " + + f"downstream consequences.") + colindex = np.where(ptab_cols==colname)[0][0] + newdat = [ptab_defs[colindex]] * len(origtable) + newcol = Table.Column(name=colname, data=newdat, dtype=ptab_dtypes[colindex]) + origtable.add_column(newcol) + + ## Finally, reorder to the current column ordering + origtable = origtable[list(ptab_cols)] + + ## If just testing, print the table and a cell-by-cell equality test + ## for the scalar columns + ## If not testing, move the original table to an archived filename + ## and save the updated table to the official exptable pathname + if dry_run: + print("\n\nOutput file would have been:") + origtable.pprint_all() + else: + ftime = time.strftime("%Y%m%d_%Hh%Mm") + replaced_pathname = orig_pathname.replace(f".{orig_filetype}", + f".replaced-{ftime}.{orig_filetype}") + print(f"Moving original file from {orig_pathname} to {replaced_pathname}") + os.rename(orig_pathname,replaced_pathname) + time.sleep(0.1) + out_pathname = orig_pathname.replace(f".{orig_filetype}", f".{out_filetype}") + write_table(origtable, out_pathname) + print(f"Updated file saved to {out_pathname}. Original archived as {replaced_pathname}") + + print("\n\n") + + ## Flush the outputs + sys.stdout.flush() + sys.stderr.flush() + print("Processing table regenerations complete") diff --git a/py/desispec/workflow/proctable.py b/py/desispec/workflow/proctable.py index beaaa24d0..1b1f5b41b 100644 --- a/py/desispec/workflow/proctable.py +++ b/py/desispec/workflow/proctable.py @@ -91,7 +91,7 @@ def get_processing_table_column_defs(return_default_values=False, coldeflt2 = [ 'a0123456789' , 0 , -99 , '' , 'unknown', defqid ] colnames2 += [ 'SUBMIT_DATE', 'STATUS' , 'SCRIPTNAME'] - coltypes2 += [ int , 'S14' , 'S40' ] + coltypes2 += [ int , 'S14' , 'S50' ] coldeflt2 += [ -99 , 'UNSUBMITTED', '' ] colnames2 += ['INT_DEP_IDS' , 'LATEST_DEP_QID' , 'ALL_QIDS' ]