From d60e7e8c6d8180b786d47941ef041d6ee2f0ddea Mon Sep 17 00:00:00 2001 From: Bryan Hilbert Date: Wed, 10 Jul 2024 12:31:15 -0400 Subject: [PATCH] filename parser no longer raises an exception for unrecognized files --- jwql/jwql_monitors/generate_preview_images.py | 31 ++++++++++++++----- jwql/jwql_monitors/monitor_filesystem.py | 5 +-- jwql/tests/test_utils.py | 7 +++-- jwql/utils/monitor_template.py | 13 +++++--- jwql/utils/monitor_utils.py | 1 - jwql/utils/organize_filesystem.py | 8 +++-- jwql/utils/utils.py | 9 +++--- .../apps/jwql/archive_database_update.py | 9 ++++-- jwql/website/apps/jwql/data_containers.py | 18 ++++++----- jwql/website/apps/jwql/forms.py | 23 +++++++++----- 10 files changed, 81 insertions(+), 43 deletions(-) diff --git a/jwql/jwql_monitors/generate_preview_images.py b/jwql/jwql_monitors/generate_preview_images.py index 544683457..32efe253e 100755 --- a/jwql/jwql_monitors/generate_preview_images.py +++ b/jwql/jwql_monitors/generate_preview_images.py @@ -212,6 +212,11 @@ def check_existence(file_list, outdir): # for the appropriately named jpg of the mosaic, which depends # on the specific detectors in the file_list file_parts = filename_parser(file_list[0]) + + # If filename_parser() does not recognize the filename, return False + if 'program_id' not in file_parts: + return False + if file_parts['detector'].upper() in NIRCAM_SHORTWAVE_DETECTORS: mosaic_str = "NRC_SW*_MOSAIC_" elif file_parts['detector'].upper() in NIRCAM_LONGWAVE_DETECTORS: @@ -253,7 +258,11 @@ def create_dummy_filename(filelist): modules = [] for filename in filelist: indir, infile = os.path.split(filename) - det_string = filename_parser(infile)['detector'] + try: + det_string = filename_parser(infile)['detector'] + except KeyError: + # If filename_parser() does not recognize the file, skip it + continue det_string_list.append(det_string) modules.append(det_string[3].upper()) @@ -307,7 +316,11 @@ def create_mosaic(filenames): else: diff_im = image.data data.append(diff_im) - detector.append(filename_parser(filename)['detector'].upper()) + try: + detector.append(filename_parser(filename)['detector'].upper()) + except KeyError: + # If filename_parser() does not recognize the file, skip it. + pass data_lower_left.append((image.xstart, image.ystart)) # Make sure SW and LW data are not being mixed. Create the @@ -623,9 +636,8 @@ def group_filenames(filenames): subgroup = [] # Generate string to be matched with other filenames - try: - filename_dict = filename_parser(os.path.basename(filename)) - except ValueError: + filename_dict = filename_parser(os.path.basename(filename)) + if 'detector' not in filename_dict: logging.warning('Could not parse filename for {}'.format(filename)) break @@ -704,7 +716,12 @@ def process_program(program, overwrite): filenames = [filename for filename in filenames if os.path.splitext(filename.split('_')[-1])[0] not in IGNORED_SUFFIXES] # Remove guiding files, as these are not currently visible in JWQL anyway - filenames = [filename for filename in filenames if 'guider_mode' not in filename_parser(filename)] + filtered_filenames = [] + for filename in filenames: + parsed = filename_parser(filename) + if 'guider_mode' not in parsed and 'detector' in parsed: + filtered_filenames.append(filename) + filenames = filtered_filenames logging.info('Found {} filenames'.format(len(filenames))) logging.info('') @@ -720,7 +737,7 @@ def process_program(program, overwrite): # Determine the save location try: identifier = 'jw{}'.format(filename_parser(filename)['program_id']) - except ValueError: + except KeyError: identifier = os.path.basename(filename).split('.fits')[0] preview_output_directory = os.path.join(SETTINGS['preview_image_filesystem'], identifier) thumbnail_output_directory = os.path.join(SETTINGS['thumbnail_filesystem'], identifier) diff --git a/jwql/jwql_monitors/monitor_filesystem.py b/jwql/jwql_monitors/monitor_filesystem.py index 2c1f4d379..fe9124485 100755 --- a/jwql/jwql_monitors/monitor_filesystem.py +++ b/jwql/jwql_monitors/monitor_filesystem.py @@ -137,9 +137,10 @@ def gather_statistics(general_results_dict, instrument_results_dict): if filename.endswith(".fits"): # Parse out filename information + filename_dict = filename_parser(filename) try: - filename_dict = filename_parser(filename) - except ValueError: + filename_type = filename_dict['filename_type'] + except KeyError: break # For MSA files, which do not have traditional suffixes, set the diff --git a/jwql/tests/test_utils.py b/jwql/tests/test_utils.py index 31bfc802c..4e054fbcb 100644 --- a/jwql/tests/test_utils.py +++ b/jwql/tests/test_utils.py @@ -435,9 +435,10 @@ def test_filename_parser_non_jwst(): that is not formatted in the JWST naming convention. Ensure the appropriate error is raised. """ - with pytest.raises(ValueError): - filename = 'not_a_jwst_file.fits' - filename_parser(filename) + filename = 'not_a_jwst_file.fits' + filename_dict = filename_parser(filename) + assert 'recognized_filename' in filename_dict + assert filename_dict['recognized_filename'] is False @pytest.mark.skipif(ON_GITHUB_ACTIONS, reason='Requires access to central storage.') diff --git a/jwql/utils/monitor_template.py b/jwql/utils/monitor_template.py index 9e36ccc0f..598ace9e5 100644 --- a/jwql/utils/monitor_template.py +++ b/jwql/utils/monitor_template.py @@ -106,11 +106,14 @@ def monitor_template_main(): # Example of locating a dataset in the filesystem filesystem = SETTINGS['filesystem'] - dataset = os.path.join(filesystem, - 'public', - 'jw{}'.format(filename_dict['program_id']), - 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit']), - filename_of_interest) + try: + dataset = os.path.join(filesystem, + 'public', + 'jw{}'.format(filename_dict['program_id']), + 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit']), + filename_of_interest) + except KeyError: + raise KeyError(f'Filename {filename_of_interest} not recognized by filename_parser()') # Example of reading in dataset using jwst.datamodels im = datamodels.open(dataset) diff --git a/jwql/utils/monitor_utils.py b/jwql/utils/monitor_utils.py index 536ac1ad4..8eb4c8a16 100644 --- a/jwql/utils/monitor_utils.py +++ b/jwql/utils/monitor_utils.py @@ -27,7 +27,6 @@ from jwql.utils.constants import ON_GITHUB_ACTIONS, ON_READTHEDOCS from jwql.utils.logging_functions import configure_logging, get_log_status from jwql.utils import mast_utils -from jwql.utils.utils import filename_parser # Increase the limit on the number of entries that can be returned by diff --git a/jwql/utils/organize_filesystem.py b/jwql/utils/organize_filesystem.py index b4dcf483e..415f5920b 100644 --- a/jwql/utils/organize_filesystem.py +++ b/jwql/utils/organize_filesystem.py @@ -57,7 +57,9 @@ def organize_filesystem(): SETTINGS['filesystem'], 'jw{}'.format(filename_dict['program_id']), 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit'])) - except KeyError: # Some filenames do not have a program_id/observation/visit structure + except KeyError: + # Some filenames do not have a program_id/observation/visit structure + # Files that are not recognized by filename_parser will also end up here. break # Build complete destination location @@ -96,7 +98,9 @@ def revert_filesystem(): destination_directory = os.path.join( SETTINGS['old_filesystem'], 'jw{}'.format(filename_dict['program_id'])) - except KeyError: # Some filenames do not have a program_id/observation/visit structure + except KeyError: + # Some filenames do not have a program_id/observation/visit structure + # Filenames not recognized by filename_parser() will also end up here. break # Build complete destination location diff --git a/jwql/utils/utils.py b/jwql/utils/utils.py index 9113169f5..7f35f3e70 100644 --- a/jwql/utils/utils.py +++ b/jwql/utils/utils.py @@ -31,6 +31,7 @@ import glob import itertools import json +import logging import pyvo as vo import os import re @@ -596,11 +597,9 @@ def filename_parser(filename): # Raise error if unable to parse the filename except AttributeError: - jdox_url = 'https://jwst-docs.stsci.edu/understanding-jwst-data-files/jwst-data-file-naming-conventions' - raise ValueError( - 'Provided file {} does not follow JWST naming conventions. ' - 'See {} for further information.'.format(filename, jdox_url) - ) + filename_dict = {'recognized_filename': False} + logging.warning((f'\nFile; {filename} was not recognized by filename_parser(). Update parser or ' + 'constants.py if it should be recognized.\n')) return filename_dict diff --git a/jwql/website/apps/jwql/archive_database_update.py b/jwql/website/apps/jwql/archive_database_update.py index bb7ffe481..10a2c2eef 100755 --- a/jwql/website/apps/jwql/archive_database_update.py +++ b/jwql/website/apps/jwql/archive_database_update.py @@ -117,9 +117,12 @@ def get_updates(update_database): for rootname in all_rootnames: filename_dict = filename_parser(rootname) - # Weed out file types that are not supported by generate_preview_images - if 'stage_3' not in filename_dict['filename_type']: - rootnames.append(rootname) + try: + # Weed out file types that are not supported by generate_preview_images + if 'stage_3' not in filename_dict['filename_type']: + rootnames.append(rootname) + except KeyError: + pass if len(filenames) > 0: diff --git a/jwql/website/apps/jwql/data_containers.py b/jwql/website/apps/jwql/data_containers.py index 6b9ac99f6..99c49f22a 100644 --- a/jwql/website/apps/jwql/data_containers.py +++ b/jwql/website/apps/jwql/data_containers.py @@ -1326,7 +1326,11 @@ def get_image_info(file_root): parsed_fn = filename_parser(filename) # Get suffix information - suffix = parsed_fn['suffix'] + try: + suffix = parsed_fn['suffix'] + except KeyError: + # If the filename parser does not recognize the file, skip it + continue # For crf or crfints suffixes, we need to also include the association value # in the suffix, so that preview images can be found later. @@ -2204,18 +2208,18 @@ def thumbnails_query_ajax(rootnames): # Parse filename try: filename_dict = filename_parser(rootname) - except ValueError: - continue - # Add to list of all exposure groups - exp_groups.add(filename_dict['group_root']) + # Add to list of all exposure groups + exp_groups.add(filename_dict['group_root']) + except KeyError: + continue # Get list of available filenames available_files = get_filenames_by_rootname(rootname) # Add data to dictionary data_dict['file_data'][rootname] = {} - data_dict['file_data'][rootname]['inst'] = JWST_INSTRUMENT_NAMES_MIXEDCASE[filename_parser(rootname)['instrument']] + data_dict['file_data'][rootname]['inst'] = JWST_INSTRUMENT_NAMES_MIXEDCASE[filename_dict['instrument']] data_dict['file_data'][rootname]['filename_dict'] = filename_dict data_dict['file_data'][rootname]['available_files'] = available_files root_file_info = RootFileInfo.objects.get(root_name=rootname) @@ -2228,7 +2232,7 @@ def thumbnails_query_ajax(rootnames): try: suffix = filename_parser(filename)['suffix'] data_dict['file_data'][rootname]['suffixes'].append(suffix) - except ValueError: + except KeyError: continue data_dict['file_data'][rootname]['thumbnail'] = get_thumbnail_by_rootname(rootname) diff --git a/jwql/website/apps/jwql/forms.py b/jwql/website/apps/jwql/forms.py index 90c43cafe..816cbfcba 100644 --- a/jwql/website/apps/jwql/forms.py +++ b/jwql/website/apps/jwql/forms.py @@ -330,10 +330,15 @@ def clean_search(self): if any(map(filename.__contains__, GUIDER_FILENAME_TYPE)): continue else: - instrument = filename_parser(file)['instrument'] - observation = filename_parser(file)['observation'] - all_instruments.append(instrument) - all_observations[instrument].append(observation) + fileinfo = filename_parser(file) + try: + instrument = fileinfo['instrument'] + observation = fileinfo['observation'] + all_instruments.append(instrument) + all_observations[instrument].append(observation) + except KeyError: + # If the filename is not recognized by filename_parser(), skip it. + continue # sort lists so first observation is available when link is clicked. for instrument in all_instruments: @@ -384,10 +389,12 @@ def _search_is_fileroot(self, search): """ try: - self.fileroot_dict = filename_parser(search) - return True - except ValueError: - return False + parsed = filename_parser(search) + if 'instrument' in parsed: + self.fileroot_dict = filename_parser(search) + return True + else: + return False def redirect_to_files(self): """Determine where to redirect the web app based on user input.