diff --git a/jwql/jwql_monitors/generate_preview_images.py b/jwql/jwql_monitors/generate_preview_images.py index dcdca3233..cb897368e 100755 --- a/jwql/jwql_monitors/generate_preview_images.py +++ b/jwql/jwql_monitors/generate_preview_images.py @@ -212,6 +212,13 @@ def check_existence(file_list, outdir): # for the appropriately named jpg of the mosaic, which depends # on the specific detectors in the file_list file_parts = filename_parser(file_list[0]) + + # If filename_parser() does not recognize the filename, return False + if not file_parts['recognized_filename']: + logging.warning((f'While running checking_existence() for a preview image for {file_list[0]}, ' + 'filename_parser() failed to recognize the file pattern.')) + return False + if file_parts['detector'].upper() in NIRCAM_SHORTWAVE_DETECTORS: mosaic_str = "NRC_SW*_MOSAIC_" elif file_parts['detector'].upper() in NIRCAM_LONGWAVE_DETECTORS: @@ -253,7 +260,14 @@ def create_dummy_filename(filelist): modules = [] for filename in filelist: indir, infile = os.path.split(filename) - det_string = filename_parser(infile)['detector'] + parsed_filename = filename_parser(infile) + if parsed_filename['recognized_filename']: + det_string = parsed_filename['detector'] + else: + # If filename_parser() does not recognize the file, skip it + logging.warning((f'While using {infile} to create a dummy filename in create_dummy_filename(), the ' + 'filename parser failed.')) + continue det_string_list.append(det_string) modules.append(det_string[3].upper()) @@ -307,7 +321,14 @@ def create_mosaic(filenames): else: diff_im = image.data data.append(diff_im) - detector.append(filename_parser(filename)['detector'].upper()) + file_info = filename_parser(filename) + if file_info['recognized_filename']: + detector.append(file_info['detector'].upper()) + else: + # If filename_parser() does not recognize the file, skip it. + logging.warning((f'While running create_mosaic() using {file_list[0]}, ' + 'filename_parser() failed to recognize the file pattern.')) + pass data_lower_left.append((image.xstart, image.ystart)) # Make sure SW and LW data are not being mixed. Create the @@ -650,10 +671,10 @@ def group_filenames(filenames): subgroup = [] # Generate string to be matched with other filenames - try: - filename_dict = filename_parser(os.path.basename(filename)) - except ValueError: - logging.warning('Could not parse filename for {}'.format(filename)) + filename_dict = filename_parser(os.path.basename(filename)) + if not filename_dict['recognized_filename']: + logging.warning((f'While running generate_preview_images.group_filenames() on {filename}, the ' + 'filename_parser() failed to recognize the file pattern.')) break # If the filename was already involved in a match, then skip @@ -731,7 +752,16 @@ def process_program(program, overwrite): filenames = [filename for filename in filenames if os.path.splitext(filename.split('_')[-1])[0] not in IGNORED_SUFFIXES] # Remove guiding files, as these are not currently visible in JWQL anyway - filenames = [filename for filename in filenames if 'guider_mode' not in filename_parser(filename)] + filtered_filenames = [] + for filename in filenames: + parsed = filename_parser(filename) + if parsed['recognized_filename']: + if 'guider_mode' not in parsed and 'detector' in parsed: + filtered_filenames.append(filename) + else: + logging.warning((f'While running generate_preview_images.process_program() on {filename}, the ' + 'filename_parser() failed to recognize the file pattern.')) + filenames = filtered_filenames logging.info('Found {} filenames'.format(len(filenames))) logging.info('') @@ -745,10 +775,14 @@ def process_program(program, overwrite): logging.debug(f'Working on {filename}') # Determine the save location - try: - identifier = 'jw{}'.format(filename_parser(filename)['program_id']) - except ValueError: + parsed = filename_parser(filename) + if parsed['recognized_filename']: + identifier = 'jw{}'.format(parsed['program_id']) + else: + # In this case, the filename_parser failed to recognize the filename identifier = os.path.basename(filename).split('.fits')[0] + logging.warning((f'While running generate_preview_images.process_program() on filtered filename {filename}, the ' + 'filename_parser() failed to recognize the file pattern.')) preview_output_directory = os.path.join(SETTINGS['preview_image_filesystem'], identifier) thumbnail_output_directory = os.path.join(SETTINGS['thumbnail_filesystem'], identifier) diff --git a/jwql/jwql_monitors/monitor_filesystem.py b/jwql/jwql_monitors/monitor_filesystem.py index 2c1f4d379..27364f888 100755 --- a/jwql/jwql_monitors/monitor_filesystem.py +++ b/jwql/jwql_monitors/monitor_filesystem.py @@ -75,6 +75,7 @@ THUMBNAILS = SETTINGS['thumbnail_filesystem'] LOGS = SETTINGS['log_dir'] + def files_per_filter(): """Querying MAST (rather than looping through the filesystem), determine how many files use each filter for each instrument. Note that thiw function takes @@ -94,7 +95,7 @@ def files_per_filter(): for fname in FILTERS_PER_INSTRUMENT[instrument]: # note that this does not include pupil wheel-based filters obs = Observations.query_criteria(filters=fname, instrument_name=JWST_INSTRUMENT_NAMES_MIXEDCASE[instrument]) batch_size = 5 - batches = [obs[i:i+batch_size] for i in range(0, len(obs), batch_size)] + batches = [obs[i:i + batch_size] for i in range(0, len(obs), batch_size)] obs_table = [Observations.get_product_list(batch) for batch in batches] products = unique(vstack(obs_table), keys='productFilename') @@ -137,9 +138,12 @@ def gather_statistics(general_results_dict, instrument_results_dict): if filename.endswith(".fits"): # Parse out filename information - try: - filename_dict = filename_parser(filename) - except ValueError: + filename_dict = filename_parser(filename) + if filename_dict['recognized_filename']: + filename_type = filename_dict['filename_type'] + else: + logging.warning((f'While running gather_statistics() on the filesystem {filename}, ' + 'caused filename_parser() to fail.')) break # For MSA files, which do not have traditional suffixes, set the diff --git a/jwql/tests/test_utils.py b/jwql/tests/test_utils.py index 31bfc802c..121a0dbec 100644 --- a/jwql/tests/test_utils.py +++ b/jwql/tests/test_utils.py @@ -42,6 +42,7 @@ 'observation': '001', 'parallel_seq_id': '1', 'program_id': '90002', + 'recognized_filename': True, 'suffix': 'rateints', 'visit': '001', 'visit_group': '02', @@ -58,6 +59,7 @@ 'observation': '001', 'parallel_seq_id': '1', 'program_id': '00327', + 'recognized_filename': True, 'suffix': 'rate', 'visit': '001', 'visit_group': '02', @@ -74,6 +76,7 @@ 'observation': '001', 'parallel_seq_id': '1', 'program_id': '00327', + 'recognized_filename': True, 'visit': '001', 'visit_group': '02', 'file_root': 'jw00327001001_02101_00002_nrca1', @@ -85,6 +88,7 @@ 'instrument': 'nirspec', 'observation': '008', 'program_id': '01118', + 'recognized_filename': True, 'visit': '001', 'detector': 'Unknown', 'file_root': 'jw01118008001_01_msa', @@ -101,6 +105,7 @@ 'observation': '002', 'parallel_seq_id': '1', 'program_id': '94015', + 'recognized_filename': True, 'suffix': 'crf', 'visit': '002', 'visit_group': '02', @@ -118,6 +123,7 @@ 'observation': '001', 'parallel_seq_id': '1', 'program_id': '90001', + 'recognized_filename': True, 'visit': '003', 'visit_group': '02', 'file_root': 'jw90001001003_02101_00001_nis', @@ -130,6 +136,7 @@ 'instrument': 'miri', 'optical_elements': 'f1130w', 'program_id': '80600', + 'recognized_filename': True, 'suffix': 'i2d', 'target_id': 't001', 'detector': 'Unknown', @@ -143,6 +150,7 @@ 'instrument': 'miri', 'optical_elements': 'f1130w', 'program_id': '80600', + 'recognized_filename': True, 'suffix': 'i2d', 'target_id': 't001', 'detector': 'Unknown', @@ -156,6 +164,7 @@ 'instrument': 'miri', 'optical_elements': 'f1130w', 'program_id': '80600', + 'recognized_filename': True, 'source_id': 's00001', 'suffix': 'i2d', 'detector': 'Unknown', @@ -170,6 +179,7 @@ 'epoch': '1', 'optical_elements': 'f1130w', 'program_id': '80600', + 'recognized_filename': True, 'suffix': 'i2d', 'target_id': 't001', 'detector': 'Unknown', @@ -184,6 +194,7 @@ 'epoch': '1', 'optical_elements': 'f1130w', 'program_id': '80600', + 'recognized_filename': True, 'source_id': 's00001', 'suffix': 'i2d', 'detector': 'Unknown', @@ -197,6 +208,7 @@ 'instrument': 'miri', 'optical_elements': 'f1130w', 'program_id': '80600', + 'recognized_filename': True, 'target_id': 't001', 'detector': 'Unknown', 'file_root': 'jw80600-o009_t001_miri_f1130w', @@ -209,6 +221,7 @@ 'instrument': 'miri', 'optical_elements': 'f1130w', 'program_id': '80600', + 'recognized_filename': True, 'source_id': 's00001', 'detector': 'Unknown', 'file_root': 'jw80600-o009_s00001_miri_f1130w', @@ -224,6 +237,7 @@ 'observation': '003', 'parallel_seq_id': '1', 'program_id': '00733', + 'recognized_filename': True, 'segment': '001', 'suffix': 'rate', 'visit': '001', @@ -242,6 +256,7 @@ 'observation': '003', 'parallel_seq_id': '1', 'program_id': '00733', + 'recognized_filename': True, 'segment': '001', 'suffix': 'crfints', 'visit': '001', @@ -259,6 +274,7 @@ 'observation': '003', 'parallel_seq_id': '1', 'program_id': '00733', + 'recognized_filename': True, 'segment': '001', 'visit': '001', 'visit_group': '02', @@ -274,6 +290,7 @@ 'instrument': 'fgs', 'observation': '011', 'program_id': '00729', + 'recognized_filename': True, 'suffix': 'image_cal', 'visit': '001', 'detector': 'Unknown', @@ -289,6 +306,7 @@ 'instrument': 'fgs', 'observation': '011', 'program_id': '00729', + 'recognized_filename': True, 'suffix': 'image_cal', 'visit': '001', 'detector': 'Unknown', @@ -304,6 +322,7 @@ 'instrument': 'fgs', 'observation': '001', 'program_id': '00327', + 'recognized_filename': True, 'visit': '001', 'detector': 'Unknown', 'file_root': 'jw00327001001_gs-id_2', @@ -318,6 +337,7 @@ 'instrument': 'fgs', 'observation': '001', 'program_id': '00327', + 'recognized_filename': True, 'visit': '001', 'detector': 'Unknown', 'file_root': 'jw00327001001_gs-id_12', @@ -332,6 +352,7 @@ 'instrument': 'fgs', 'observation': '048', 'program_id': '86600', + 'recognized_filename': True, 'suffix': 'stream', 'visit': '001', 'detector': 'Unknown', @@ -347,6 +368,7 @@ 'instrument': 'fgs', 'observation': '011', 'program_id': '00729', + 'recognized_filename': True, 'visit': '001', 'detector': 'Unknown', 'file_root': 'jw00729011001_gs-acq2_2019155024808', @@ -361,6 +383,7 @@ 'instrument': 'fgs', 'observation': '005', 'program_id': '01118', + 'recognized_filename': True, 'segment': '002', 'suffix': 'uncal', 'visit': '001', @@ -371,6 +394,7 @@ # Test msa file ('jw02560013001_01_msa.fits', {'program_id': '02560', + 'recognized_filename': True, 'observation': '013', 'visit': '001', 'filename_type': 'stage_2_msa', @@ -435,9 +459,10 @@ def test_filename_parser_non_jwst(): that is not formatted in the JWST naming convention. Ensure the appropriate error is raised. """ - with pytest.raises(ValueError): - filename = 'not_a_jwst_file.fits' - filename_parser(filename) + filename = 'not_a_jwst_file.fits' + filename_dict = filename_parser(filename) + assert 'recognized_filename' in filename_dict + assert filename_dict['recognized_filename'] is False @pytest.mark.skipif(ON_GITHUB_ACTIONS, reason='Requires access to central storage.') @@ -479,7 +504,7 @@ def test_validate_config(): "admin_account": "", "auth_mast": "", "connection_string": "", - "database": { + "databases": { "engine": "", "name": "", "user": "", @@ -487,12 +512,32 @@ def test_validate_config(): "host": "", "port": "" }, + "django_databases": { + "default": { + "ENGINE": "", + "NAME": "", + "USER": "", + "PASSWORD": "", + "HOST": "", + "PORT": "" + }, + "monitors": { + "ENGINE": "", + "NAME": "", + "USER": "", + "PASSWORD": "", + "HOST": "", + "PORT": "" + } + }, + "django_debug": "", "jwql_dir": "", "jwql_version": "", "server_type": "", "log_dir": "", "mast_token": "", "outputs": "", + "working": "", "preview_image_filesystem": "", "filesystem": "", "setup_file": "", diff --git a/jwql/utils/monitor_template.py b/jwql/utils/monitor_template.py index 9e36ccc0f..b2567a35b 100644 --- a/jwql/utils/monitor_template.py +++ b/jwql/utils/monitor_template.py @@ -106,11 +106,14 @@ def monitor_template_main(): # Example of locating a dataset in the filesystem filesystem = SETTINGS['filesystem'] - dataset = os.path.join(filesystem, - 'public', - 'jw{}'.format(filename_dict['program_id']), - 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit']), - filename_of_interest) + if filename_dict['recognized_filename']: + dataset = os.path.join(filesystem, + 'public', + 'jw{}'.format(filename_dict['program_id']), + 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit']), + filename_of_interest) + else: + raise KeyError(f'Filename {filename_of_interest} not recognized by filename_parser() in monitor_template_main') # Example of reading in dataset using jwst.datamodels im = datamodels.open(dataset) diff --git a/jwql/utils/monitor_utils.py b/jwql/utils/monitor_utils.py index 536ac1ad4..8eb4c8a16 100644 --- a/jwql/utils/monitor_utils.py +++ b/jwql/utils/monitor_utils.py @@ -27,7 +27,6 @@ from jwql.utils.constants import ON_GITHUB_ACTIONS, ON_READTHEDOCS from jwql.utils.logging_functions import configure_logging, get_log_status from jwql.utils import mast_utils -from jwql.utils.utils import filename_parser # Increase the limit on the number of entries that can be returned by diff --git a/jwql/utils/organize_filesystem.py b/jwql/utils/organize_filesystem.py index b4dcf483e..ee1e2ce4a 100644 --- a/jwql/utils/organize_filesystem.py +++ b/jwql/utils/organize_filesystem.py @@ -52,12 +52,16 @@ def organize_filesystem(): filename_dict = filename_parser(src) # Build destination path for those filenames that can be parsed - try: + if filename_dict['recognized_filename']: destination_directory = os.path.join( SETTINGS['filesystem'], 'jw{}'.format(filename_dict['program_id']), 'jw{}{}{}'.format(filename_dict['program_id'], filename_dict['observation'], filename_dict['visit'])) - except KeyError: # Some filenames do not have a program_id/observation/visit structure + else: + # Some filenames do not have a program_id/observation/visit structure + # Files that are not recognized by filename_parser will also end up here. + logging.warning((f'While running organize_filesystem(), {src} was not, ' + 'recognized by the filename_parser().')) break # Build complete destination location @@ -92,11 +96,15 @@ def revert_filesystem(): filename_dict = filename_parser(src) # Build destination path for those filenames that can be parsed - try: + if filename_dict['recognized_filename']: destination_directory = os.path.join( SETTINGS['old_filesystem'], 'jw{}'.format(filename_dict['program_id'])) - except KeyError: # Some filenames do not have a program_id/observation/visit structure + else: + # Some filenames do not have a program_id/observation/visit structure + # Filenames not recognized by filename_parser() will also end up here. + logging.warning((f'While running revert_filesystem(), {src} was not, ' + 'recognized by the filename_parser().')) break # Build complete destination location diff --git a/jwql/utils/utils.py b/jwql/utils/utils.py index 9113169f5..d96388d9a 100644 --- a/jwql/utils/utils.py +++ b/jwql/utils/utils.py @@ -31,6 +31,7 @@ import glob import itertools import json +import logging import pyvo as vo import os import re @@ -569,6 +570,9 @@ def filename_parser(filename): # Convert the regex match to a dictionary filename_dict = jwst_file.groupdict() + # Add an entry indicating that the filename was successfully parsed + filename_dict['recognized_filename'] = True + # Add the filename type to that dict filename_dict['filename_type'] = name_match @@ -596,11 +600,9 @@ def filename_parser(filename): # Raise error if unable to parse the filename except AttributeError: - jdox_url = 'https://jwst-docs.stsci.edu/understanding-jwst-data-files/jwst-data-file-naming-conventions' - raise ValueError( - 'Provided file {} does not follow JWST naming conventions. ' - 'See {} for further information.'.format(filename, jdox_url) - ) + filename_dict = {'recognized_filename': False} + logging.exception((f'\nFile; {filename} was not recognized by filename_parser(). Update parser or ' + 'constants.py if it should be recognized.\n')) return filename_dict diff --git a/jwql/website/apps/jwql/archive_database_update.py b/jwql/website/apps/jwql/archive_database_update.py index bb7ffe481..748cde5dc 100755 --- a/jwql/website/apps/jwql/archive_database_update.py +++ b/jwql/website/apps/jwql/archive_database_update.py @@ -117,9 +117,14 @@ def get_updates(update_database): for rootname in all_rootnames: filename_dict = filename_parser(rootname) - # Weed out file types that are not supported by generate_preview_images - if 'stage_3' not in filename_dict['filename_type']: - rootnames.append(rootname) + if filename_dict['recognized_filename']: + # Weed out file types that are not supported by generate_preview_images + if 'stage_3' not in filename_dict['filename_type']: + rootnames.append(rootname) + else: + logging.warning((f'While running get_updates() to update the RootfileInfo tables, {rootname}, ' + 'was not recognized by the filename_parser().')) + pass if len(filenames) > 0: diff --git a/jwql/website/apps/jwql/data_containers.py b/jwql/website/apps/jwql/data_containers.py index a61a43804..3e6b4ff0d 100644 --- a/jwql/website/apps/jwql/data_containers.py +++ b/jwql/website/apps/jwql/data_containers.py @@ -1430,7 +1430,13 @@ def get_image_info(file_root): parsed_fn = filename_parser(filename) # Get suffix information - suffix = parsed_fn['suffix'] + if parsed_fn['recognized_filename']: + suffix = parsed_fn['suffix'] + else: + # If the filename parser does not recognize the file, skip it + logging.warning((f'While running get_image_info() on {filename}, the ' + 'filename_parser() failed to recognize the file pattern.')) + continue # For crf or crfints suffixes, we need to also include the association value # in the suffix, so that preview images can be found later. @@ -1712,11 +1718,14 @@ def get_proposal_info(filepaths): obsnums = [] for fname in files_for_proposal: - try: - obs = filename_parser(fname)['observation'] + file_info = filename_parser(fname) + if file_info['recognized_filename']: + obs = file_info['observation'] obsnums.append(obs) - except KeyError: - pass + else: + logging.warning((f'While running get_proposal_info() for a program {proposal}, {fname} ' + 'was not recognized by the filename_parser().')) + obsnums = sorted(obsnums) observations.extend(obsnums) num_files.append(len(files_for_proposal)) @@ -2152,10 +2161,13 @@ def thumbnails_ajax(inst, proposal, obs_num=None): # Wrap in try/except because level 3 rootnames won't have an observation # number returned by the filename_parser. That's fine, we're not interested # in those files anyway. - try: - all_obs.append(filename_parser(root)['observation']) - except KeyError: - pass + file_info = filename_parser(root) + if file_info['recognized_filename']: + all_obs.append(file_info['observation']) + else: + logging.warning((f'While running thumbnails_ajax() on root {root}, ' + 'filename_parser() failed to recognize the file pattern.')) + obs_list = sorted(list(set(all_obs))) # Get the available files for the instrument @@ -2175,14 +2187,13 @@ def thumbnails_ajax(inst, proposal, obs_num=None): for rootname in rootnames: # Parse filename - try: - filename_dict = filename_parser(rootname) - + filename_dict = filename_parser(rootname) + if filename_dict['recognized_filename']: # Weed out file types that are not supported by generate_preview_images if 'stage_3' in filename_dict['filename_type']: continue - except ValueError: + else: # Temporary workaround for noncompliant files in filesystem filename_dict = {'activity': rootname[17:19], 'detector': rootname[26:], @@ -2193,6 +2204,8 @@ def thumbnails_ajax(inst, proposal, obs_num=None): 'visit': rootname[10:13], 'visit_group': rootname[14:16], 'group_root': rootname[:26]} + logging.warning((f'While running thumbnails_ajax() on rootname {rootname}, ' + 'filename_parser() failed to recognize the file pattern.')) # Get list of available filenames and exposure start times. All files with a given # rootname will have the same exposure start time, so just keep the first. @@ -2333,9 +2346,14 @@ def thumbnails_query_ajax(rootnames): continue # Parse filename - try: - filename_dict = filename_parser(rootname) - except ValueError: + filename_dict = filename_parser(rootname) + + if filename_dict['recognized_filename']: + # Add to list of all exposure groups + exp_groups.add(filename_dict['group_root']) + else: + logging.warning((f'While running thumbnails_query_ajax() on rootname {rootname}, ' + 'filename_parser() failed to recognize the file pattern.')) continue try: @@ -2348,15 +2366,12 @@ def thumbnails_query_ajax(rootnames): pupil_type = "" grating_type = "" - # Add to list of all exposure groups - exp_groups.add(filename_dict['group_root']) - # Get list of available filenames available_files = get_filenames_by_rootname(rootname) # Add data to dictionary data_dict['file_data'][rootname] = {} - data_dict['file_data'][rootname]['inst'] = JWST_INSTRUMENT_NAMES_MIXEDCASE[filename_parser(rootname)['instrument']] + data_dict['file_data'][rootname]['inst'] = JWST_INSTRUMENT_NAMES_MIXEDCASE[filename_dict['instrument']] data_dict['file_data'][rootname]['filename_dict'] = filename_dict data_dict['file_data'][rootname]['available_files'] = available_files root_file_info = RootFileInfo.objects.get(root_name=rootname) @@ -2369,11 +2384,15 @@ def thumbnails_query_ajax(rootnames): data_dict['file_data'][rootname]['pupil'] = pupil_type data_dict['file_data'][rootname]['grating'] = grating_type for filename in available_files: - try: - suffix = filename_parser(filename)['suffix'] + file_info = filename_parser(filename) + if file_info['recognized_filename']: + suffix = file_info['suffix'] data_dict['file_data'][rootname]['suffixes'].append(suffix) - except ValueError: + else: + logging.warning((f'While running thumbnails_query_ajax() on filename {filename}, ' + 'filename_parser() failed to recognize the file pattern.')) continue + data_dict['file_data'][rootname]['thumbnail'] = get_thumbnail_by_rootname(rootname) # Extract information for sorting with dropdown menus diff --git a/jwql/website/apps/jwql/forms.py b/jwql/website/apps/jwql/forms.py index c7a4bdc38..e66bbad04 100644 --- a/jwql/website/apps/jwql/forms.py +++ b/jwql/website/apps/jwql/forms.py @@ -363,10 +363,17 @@ def clean_search(self): if any(map(filename.__contains__, GUIDER_FILENAME_TYPE)): continue else: - instrument = filename_parser(file)['instrument'] - observation = filename_parser(file)['observation'] - all_instruments.append(instrument) - all_observations[instrument].append(observation) + fileinfo = filename_parser(file) + if fileinfo['recognized_filename']: + instrument = fileinfo['instrument'] + observation = fileinfo['observation'] + all_instruments.append(instrument) + all_observations[instrument].append(observation) + else: + # If the filename is not recognized by filename_parser(), skip it. + logging.warning((f'While running FileSearchForm.clean_search() on {file}, ' + 'filename_parser() failed to recognize the file pattern.')) + continue # sort lists so first observation is available when link is clicked. for instrument in all_instruments: @@ -415,11 +422,11 @@ def _search_is_fileroot(self, search): bool Is the search term formatted like a fileroot? """ - - try: - self.fileroot_dict = filename_parser(search) + parsed = filename_parser(search) + if parsed['recognized_filename']: + self.fileroot_dict = parsed return True - except ValueError: + else: return False def redirect_to_files(self):