Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add script to update processing table column layout #2376

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions bin/desi_reformat_processing_tables
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python
# coding: utf-8

import argparse

## Import some helper functions, you can see their definitions by uncomenting the bash shell command
from desispec.scripts.reformat_proctables import update_processing_tables


def get_parser():
akremin marked this conversation as resolved.
Show resolved Hide resolved
"""
Creates an arguments parser for the desi_reformat_processing_tables script
"""
parser = argparse.ArgumentParser(usage = "{prog} [options]")
parser.add_argument("-n", "--nights", type=str, default=None, help="nights as comma separated string")
parser.add_argument("--night-range", type=str, default=None, help="comma separated pair of nights in form YYYYMMDD,YYYYMMDD"+\
"for first_night,last_night specifying the beginning"+\
"and end of a range of nights to be generated. "+\
"last_night should be inclusive.")
parser.add_argument("--orig-filetype", type=str, default='csv', help="format type for original exposure tables")
parser.add_argument("--out-filetype", type=str, default='csv', help="format type for output exposure tables")
parser.add_argument("--dry-run", action="store_true",
help="Perform a dry run, printing the changes that would be made and the final output table "+
"but not overwriting the actual files on disk.")
return parser



if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()

update_processing_tables(**args.__dict__)
3 changes: 3 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,9 @@ desispec API
.. automodule:: desispec.scripts.reformat_exptables
:members:

.. automodule:: desispec.scripts.reformat_proctables
:members:

.. automodule:: desispec.scripts.rejectcosmics
:members:

Expand Down
177 changes: 177 additions & 0 deletions py/desispec/scripts/reformat_proctables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""
desispec.scripts.updateexptables
akremin marked this conversation as resolved.
Show resolved Hide resolved
================================

"""
import os
import glob
import sys
import numpy as np
import re
import time
from astropy.table import Table

from desispec.io.meta import findfile
from desispec.workflow.proctable import get_processing_table_column_defs
from desispec.workflow.utils import define_variable_from_environment, listpath, \
pathjoin
from desispec.workflow.tableio import write_table, load_table
from desispec.scripts.exposuretable import create_exposure_tables



def update_processing_tables(nights=None, night_range=None, orig_filetype='csv',
out_filetype='csv', dry_run=False):
"""
Generates updated processing tables for the nights requested. Requires
a current processing table to exist on disk.

Args:
nights: str, int, or comma separated list. The night(s) to generate
procesing tables for.
night_range: str. comma separated pair of nights in form
YYYYMMDD,YYYYMMDD for first_night,last_night
specifying the beginning and end of a range of
nights to be generated. first_night and last_night are
inclusive.
orig_filetype: str. The file extension (without the '.') of the exposure
tables.
out_filetype: str. The file extension for the outputted exposure tables
(without the '.').
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that you've already implemented it, this may not matter, but what's the motivation for orig_filetype and out_filetype? Do we have any processing tables that aren't *.csv?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I adapted the parser from desi_reformat_exptables which is much older. In the past exposure_tables and processing_tables were less mature and the file type was specified (with a default of csv). At this point, they are fundamental enough that changing the filetype is unlikely, but I kept it since I didn't see harm in allowing extra functionality.


Returns:
Nothing
"""
# log = get_logger()
## Make sure user specified what nights to run on
if nights is None and night_range is None:
raise ValueError("Must specify either nights or night_range."
+" To process all nights give nights=all")

## Get all nights in 2020's with data
proctab_template = findfile('proctable', night=99999999)
proctab_template = proctab_template.replace('99999999', '202[0-9][01][0-9][0-3][0-9]')
proctab_template = proctab_template.replace('.csv', f'.{orig_filetype}')
nights_with_proctables = list()
for ptabfn in glob.glob(proctab_template):
## nights are 202YMMDD
matches = re.findall('202\d{5}', os.path.basename(ptabfn))
if len(matches) == 1:
n = int(matches[0])
nights_with_proctables.append(n)
else:
print(f"Couldn't parse a night from proctable file: {ptabfn}")

## If unpecified or given "all", set nights to all nights with data
check_night = False
if nights is None or nights == 'all':
nights = nights_with_proctables
## No need to check nights since derived from disk
else:
nights = [int(val.strip()) for val in nights.split(",")]
## If nights are specified, make sure we check that there is actually data
check_night = True
nights = np.sort(nights)

## If user specified a night range, cut nights to that range of dates
if night_range is not None:
if ',' not in night_range:
raise ValueError("night_range must be a comma separated pair of "
+ "nights in form YYYYMMDD,YYYYMMDD")
nightpair = night_range.split(',')
if len(nightpair) != 2 or not nightpair[0].isnumeric() \
or not nightpair[1].isnumeric():
raise ValueError("night_range must be a comma separated pair of "
+ "nights in form YYYYMMDD,YYYYMMDD")
first_night, last_night = nightpair
nights = nights[np.where(int(first_night) <= nights.astype(int))[0]]
nights = nights[np.where(int(last_night) >= nights.astype(int))[0]]

## Get current set of expected columns
ptab_cols, ptab_dtypes, ptab_defs = get_processing_table_column_defs(return_default_values=True)
ptab_cols, ptab_dtypes = np.array(ptab_cols), np.array(ptab_dtypes)

## Tell user the final list of nights and starting looping over them
print("Nights: ", nights)
for night in nights:
if check_night and night not in nights_with_proctables:
print(f"Night {night} doesn't have a processing table: Skipping.")
continue

## If the processing table doesn't exist, skip, since we are updating
## not generating.
orig_pathname = findfile('proctable', night=night).replace('.csv', f'.{orig_filetype}')
if not os.path.exists(orig_pathname):
print(f'Could not find processing table for night={night} at:'
+ f' {orig_pathname}. Skipping this night.')
continue

## Load the old and new tables to compare
origtable = load_table(orig_pathname, tabletype='proctab')
curr_colnames = np.array(list(origtable.colnames))
expected_cols = np.isin(curr_colnames, ptab_cols)
found_cols = np.isin(ptab_cols, curr_colnames)

## If everything is present, don't try to do anything
if np.all(expected_cols) and np.all(found_cols):
print(f"{orig_pathname} has all of the expected columns, not updating this table.")
continue

unexpected = list(curr_colnames[~expected_cols])
missing = list(ptab_cols[~found_cols])
print(f"Found the following unexpected columns: {unexpected}")
print(f"Found the following missing columns: {missing}")

## Solving the only cases I'm currently aware of
if 'CAMWORD' in unexpected and 'PROCCAMWORD' in missing:
print(f"CAMWORD listed instead of PROCCAMWORD. Updating that.")
origtable.rename_column('CAMWORD', 'PROCCAMWORD')
unexpected.remove('CAWORD')
missing.remove('PROCCAMWORD')

if len(unexpected) > 0:
print(f"WARNING: Script detected unexpected columns. Only handle "
+ f"the case where 'CAMWORD' is defined instead of PROCCAMWORD. "
+ f"The following unexpected columns will be dropped without "
+ f"using the information they contain: {unexpected}.")
for colname in unexpected:
origtable.remove_column(colname)

## Add any missing columns
for colname in missing:
if colname not in ['BADAMPS', 'LASTSTEP', 'EXPFLAG']:
print(f"WARNING: Script didn't expect {colname} to be missing. "
+ f"Replacing with default values, but this may have "
+ f"downstream consequences.")
colindex = np.where(ptab_cols==colname)[0][0]
newdat = [ptab_defs[colindex]] * len(origtable)
newcol = Table.Column(name=colname, data=newdat, dtype=ptab_dtypes[colindex])
origtable.add_column(newcol)

## Finally, reorder to the current column ordering
origtable = origtable[list(ptab_cols)]

## If just testing, print the table and a cell-by-cell equality test
## for the scalar columns
## If not testing, move the original table to an archived filename
## and save the updated table to the official exptable pathname
if dry_run:
print("\n\nOutput file would have been:")
origtable.pprint_all()
else:
ftime = time.strftime("%Y%m%d_%Hh%Mm")
replaced_pathname = orig_pathname.replace(f".{orig_filetype}",
f".replaced-{ftime}.{orig_filetype}")
print(f"Moving original file from {orig_pathname} to {replaced_pathname}")
os.rename(orig_pathname,replaced_pathname)
time.sleep(0.1)
out_pathname = orig_pathname.replace(f".{orig_filetype}", f".{out_filetype}")
write_table(origtable, out_pathname)
print(f"Updated file saved to {out_pathname}. Original archived as {replaced_pathname}")

print("\n\n")

## Flush the outputs
sys.stdout.flush()
sys.stderr.flush()
print("Exposure table regenerations complete")
akremin marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion py/desispec/workflow/proctable.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def get_processing_table_column_defs(return_default_values=False,
coldeflt2 = [ 'a0123456789' , 0 , -99 , '' , 'unknown', defqid ]

colnames2 += [ 'SUBMIT_DATE', 'STATUS' , 'SCRIPTNAME']
coltypes2 += [ int , 'S14' , 'S40' ]
coltypes2 += [ int , 'S14' , 'S50' ]
coldeflt2 += [ -99 , 'UNSUBMITTED', '' ]

colnames2 += ['INT_DEP_IDS' , 'LATEST_DEP_QID' , 'ALL_QIDS' ]
Expand Down
Loading