Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add script to update processing table column layout #2376

Merged
merged 4 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 0 additions & 40 deletions bin/desi_reformat_exposure_tables

This file was deleted.

12 changes: 12 additions & 0 deletions bin/desi_reformat_exptables
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env python
# coding: utf-8

## Import some helper functions, you can see their definitions by uncomenting the bash shell command
from desispec.scripts.reformat_exptables import get_parser, reformat_exposure_tables


if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()

reformat_exposure_tables(**args.__dict__)
14 changes: 14 additions & 0 deletions bin/desi_reformat_proctables
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env python
# coding: utf-8

## Import some helper functions, you can see their definitions by uncomenting the bash shell command
from desispec.scripts.reformat_proctables import get_parser, reformat_processing_tables




if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()

reformat_processing_tables(**args.__dict__)
3 changes: 3 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,9 @@ desispec API
.. automodule:: desispec.scripts.reformat_exptables
:members:

.. automodule:: desispec.scripts.reformat_proctables
:members:

.. automodule:: desispec.scripts.rejectcosmics
:members:

Expand Down
30 changes: 27 additions & 3 deletions py/desispec/scripts/reformat_exptables.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""
desispec.scripts.updateexptables
================================
desispec.scripts.reformat_exptables
===================================

"""
import argparse
import os
import sys
import numpy as np
Expand All @@ -20,8 +21,31 @@
from desispec.scripts.exposuretable import create_exposure_tables


def get_parser():
"""
Creates an arguments parser for the desi_reformat_exposure_tables script
"""
parser = argparse.ArgumentParser(usage = "{prog} [options]")
parser.add_argument("-n", "--nights", type=str, default=None, help="nights as comma separated string")
parser.add_argument("--night-range", type=str, default=None, help="comma separated pair of nights in form YYYYMMDD,YYYYMMDD"+\
"for first_night,last_night specifying the beginning"+\
"and end of a range of nights to be generated. "+\
"last_night should be inclusive.")
parser.add_argument("--obstypes", type=str, default=None, help="comma separated list of exposure types to include in "+\
"the exposure table, e.g. science,arc,flat,dark,zero, ...")
parser.add_argument("-i", "--path-to-data", type=str, default=None, help="path to the raw input data")
parser.add_argument("-o","--exp-table-path", type=str, default=None, help="path to save exposure tables, without monthly subdirectory")
parser.add_argument("--orig-filetype", type=str, default='csv', help="format type for original exposure tables")
parser.add_argument("--out-filetype", type=str, default='csv', help="format type for output exposure tables")
parser.add_argument("--verbose", action="store_true", help="print verbose output")
parser.add_argument("--dry-run", action="store_true",
help="Perform a dry run, printing the changes that would be made and the final output table "+
"but not overwriting the actual files on disk.")
parser.add_argument("--no-specprod", action="store_true", help="Create exposure table in repository location "+\
"rather than the SPECPROD location.")
return parser

def update_exposure_tables(nights=None, night_range=None, path_to_data=None,
def reformat_exposure_tables(nights=None, night_range=None, path_to_data=None,
exp_table_path=None, obstypes=None, orig_filetype='csv',
out_filetype='csv', verbose=False, no_specprod=False,
dry_run=False):
Expand Down
194 changes: 194 additions & 0 deletions py/desispec/scripts/reformat_proctables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
"""
desispec.scripts.reformat_proctables
====================================

"""
import argparse
import os
import glob
import sys
import numpy as np
import re
import time
from astropy.table import Table

from desispec.io.meta import findfile
from desispec.workflow.proctable import get_processing_table_column_defs
from desispec.workflow.utils import define_variable_from_environment, listpath, \
pathjoin
from desispec.workflow.tableio import write_table, load_table
from desispec.scripts.exposuretable import create_exposure_tables


def get_parser():
"""
Creates an arguments parser for the desi_reformat_processing_tables script
"""
parser = argparse.ArgumentParser(usage = "{prog} [options]")
parser.add_argument("-n", "--nights", type=str, default=None, help="nights as comma separated string")
parser.add_argument("--night-range", type=str, default=None, help="comma separated pair of nights in form YYYYMMDD,YYYYMMDD"+\
"for first_night,last_night specifying the beginning"+\
"and end of a range of nights to be generated. "+\
"last_night should be inclusive.")
parser.add_argument("--orig-filetype", type=str, default='csv', help="format type for original exposure tables")
parser.add_argument("--out-filetype", type=str, default='csv', help="format type for output exposure tables")
parser.add_argument("--dry-run", action="store_true",
help="Perform a dry run, printing the changes that would be made and the final output table "+
"but not overwriting the actual files on disk.")
return parser

def reformat_processing_tables(nights=None, night_range=None, orig_filetype='csv',
out_filetype='csv', dry_run=False):
"""
Generates updated processing tables for the nights requested. Requires
a current processing table to exist on disk.

Args:
nights: str, int, or comma separated list. The night(s) to generate
processing tables for.
night_range: str. comma separated pair of nights in form
YYYYMMDD,YYYYMMDD for first_night,last_night
specifying the beginning and end of a range of
nights to be generated. first_night and last_night are
inclusive.
orig_filetype: str. The file extension (without the '.') of the processing
tables.
out_filetype: str. The file extension for the outputted processing tables
(without the '.').
sbailey marked this conversation as resolved.
Show resolved Hide resolved

Returns:
Nothing
"""
# log = get_logger()
## Make sure user specified what nights to run on
if nights is None and night_range is None:
raise ValueError("Must specify either nights or night_range."
+" To process all nights give nights=all")

## Get all nights in 2020's with data
proctab_template = findfile('proctable', night=99999999)
proctab_template = proctab_template.replace('99999999', '202[0-9][01][0-9][0-3][0-9]')
proctab_template = proctab_template.replace('.csv', f'.{orig_filetype}')
nights_with_proctables = list()
for ptabfn in glob.glob(proctab_template):
## nights are 202YMMDD
matches = re.findall('202\d{5}', os.path.basename(ptabfn))
if len(matches) == 1:
n = int(matches[0])
nights_with_proctables.append(n)
else:
print(f"Couldn't parse a night from proctable file: {ptabfn}")

## If unpecified or given "all", set nights to all nights with data
check_night = False
if nights is None or nights == 'all':
nights = nights_with_proctables
## No need to check nights since derived from disk
else:
nights = [int(val.strip()) for val in nights.split(",")]
## If nights are specified, make sure we check that there is actually data
check_night = True
nights = np.sort(nights)

## If user specified a night range, cut nights to that range of dates
if night_range is not None:
if ',' not in night_range:
raise ValueError("night_range must be a comma separated pair of "
+ "nights in form YYYYMMDD,YYYYMMDD")
nightpair = night_range.split(',')
if len(nightpair) != 2 or not nightpair[0].isnumeric() \
or not nightpair[1].isnumeric():
raise ValueError("night_range must be a comma separated pair of "
+ "nights in form YYYYMMDD,YYYYMMDD")
first_night, last_night = nightpair
nights = nights[np.where(int(first_night) <= nights.astype(int))[0]]
nights = nights[np.where(int(last_night) >= nights.astype(int))[0]]

## Get current set of expected columns
ptab_cols, ptab_dtypes, ptab_defs = get_processing_table_column_defs(return_default_values=True)
ptab_cols, ptab_dtypes = np.array(ptab_cols), np.array(ptab_dtypes)

## Tell user the final list of nights and starting looping over them
print("Nights: ", nights)
for night in nights:
if check_night and night not in nights_with_proctables:
print(f"Night {night} doesn't have a processing table: Skipping.")
continue

## If the processing table doesn't exist, skip, since we are updating
## not generating.
orig_pathname = findfile('proctable', night=night).replace('.csv', f'.{orig_filetype}')
if not os.path.exists(orig_pathname):
print(f'Could not find processing table for night={night} at:'
+ f' {orig_pathname}. Skipping this night.')
continue

## Load the old and new tables to compare
origtable = load_table(orig_pathname, tabletype='proctab')
curr_colnames = np.array(list(origtable.colnames))
expected_cols = np.isin(curr_colnames, ptab_cols)
found_cols = np.isin(ptab_cols, curr_colnames)

## If everything is present, don't try to do anything
if np.all(expected_cols) and np.all(found_cols):
print(f"{orig_pathname} has all of the expected columns, not updating this table.")
continue

unexpected = list(curr_colnames[~expected_cols])
missing = list(ptab_cols[~found_cols])
print(f"Found the following unexpected columns: {unexpected}")
print(f"Found the following missing columns: {missing}")

## Solving the only cases I'm currently aware of
if 'CAMWORD' in unexpected and 'PROCCAMWORD' in missing:
print(f"CAMWORD listed instead of PROCCAMWORD. Updating that.")
origtable.rename_column('CAMWORD', 'PROCCAMWORD')
unexpected.remove('CAWORD')
missing.remove('PROCCAMWORD')

if len(unexpected) > 0:
print(f"WARNING: Script detected unexpected columns. Only handle "
+ f"the case where 'CAMWORD' is defined instead of PROCCAMWORD. "
+ f"The following unexpected columns will be dropped without "
+ f"using the information they contain: {unexpected}.")
for colname in unexpected:
origtable.remove_column(colname)

## Add any missing columns
for colname in missing:
if colname not in ['BADAMPS', 'LASTSTEP', 'EXPFLAG']:
print(f"WARNING: Script didn't expect {colname} to be missing. "
+ f"Replacing with default values, but this may have "
+ f"downstream consequences.")
colindex = np.where(ptab_cols==colname)[0][0]
newdat = [ptab_defs[colindex]] * len(origtable)
newcol = Table.Column(name=colname, data=newdat, dtype=ptab_dtypes[colindex])
origtable.add_column(newcol)

## Finally, reorder to the current column ordering
origtable = origtable[list(ptab_cols)]

## If just testing, print the table and a cell-by-cell equality test
## for the scalar columns
## If not testing, move the original table to an archived filename
## and save the updated table to the official exptable pathname
if dry_run:
print("\n\nOutput file would have been:")
origtable.pprint_all()
else:
ftime = time.strftime("%Y%m%d_%Hh%Mm")
replaced_pathname = orig_pathname.replace(f".{orig_filetype}",
f".replaced-{ftime}.{orig_filetype}")
print(f"Moving original file from {orig_pathname} to {replaced_pathname}")
os.rename(orig_pathname,replaced_pathname)
time.sleep(0.1)
out_pathname = orig_pathname.replace(f".{orig_filetype}", f".{out_filetype}")
write_table(origtable, out_pathname)
print(f"Updated file saved to {out_pathname}. Original archived as {replaced_pathname}")

print("\n\n")

## Flush the outputs
sys.stdout.flush()
sys.stderr.flush()
print("Processing table regenerations complete")
2 changes: 1 addition & 1 deletion py/desispec/workflow/proctable.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def get_processing_table_column_defs(return_default_values=False,
coldeflt2 = [ 'a0123456789' , 0 , -99 , '' , 'unknown', defqid ]

colnames2 += [ 'SUBMIT_DATE', 'STATUS' , 'SCRIPTNAME']
coltypes2 += [ int , 'S14' , 'S40' ]
coltypes2 += [ int , 'S14' , 'S50' ]
coldeflt2 += [ -99 , 'UNSUBMITTED', '' ]

colnames2 += ['INT_DEP_IDS' , 'LATEST_DEP_QID' , 'ALL_QIDS' ]
Expand Down
Loading