Skip to content

Commit

Permalink
Merged schema_merge3 to trunk, again. 5 errors 5 fail to be resolved.
Browse files Browse the repository at this point in the history
Schema loading broken due to apparent changes in data model structure.


git-svn-id: https://aeon.stsci.edu/ssb/svn/crds/trunk@2652 0d8f46c8-9b30-49d0-b470-0f6283dc92e8
  • Loading branch information
[email protected] committed Oct 18, 2015
1 parent cdd820c commit 52fea6e
Show file tree
Hide file tree
Showing 134 changed files with 11,872 additions and 175 deletions.
9 changes: 8 additions & 1 deletion crds/bestrefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def update_headers(self, headers2, only_ids=None):

# Munge for consistent case and value formatting regardless of source
headers2 = { dataset_id :
{ key.upper():utils.condition_value(val) for (key,val) in headers2[dataset_id].items() }
{ key.upper():bestrefs_condition(val) for (key,val) in headers2[dataset_id].items() }
for dataset_id in headers2 if dataset_id in only_ids }

# replace param-by-param, not id-by-id, since headers2[id] may be partial
Expand Down Expand Up @@ -179,6 +179,13 @@ def handle_updates(self, all_updates):
new_ref = new_ref.lower()
self.headers[dataset][update.filekind.upper()] = new_ref

def bestrefs_condition(value):
"""Condition header keyword value to normal form, converting NOT FOUND N/A to N/A."""
val = utils.condition_value(value)
if val == "NOT FOUND N/A":
val = "N/A"
return val

# ===================================================================

# FileHeaderGenerator uses a deferred header loading scheme which incrementally reads each header
Expand Down
127 changes: 86 additions & 41 deletions crds/certify.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@

import numpy as np

import crds
from crds import rmap, log, timestamp, utils, data_file, diff, cmdline, config
from crds import tables
from crds import client
from crds import mapping_parser
from crds import selectors
from crds.exceptions import (MissingKeywordError, IllegalKeywordError, InvalidFormatError, TypeSetupError,
ValidationError)

Expand Down Expand Up @@ -175,10 +177,9 @@ def _check_value(self, filename, value):
def _match_value(self, value):
"""Do a literal match of `value` to the values of this tpninfo."""
return value in self._values



class RegexValidator(KeywordValidator):
"""Checks that a value is one of the literal TpnInfo values."""
"""Checks that a value matches TpnInfo values treated as regexes."""
def _match_value(self, value):
if super(RegexValidator, self)._match_value(value):
return True
Expand All @@ -198,6 +199,16 @@ def condition(self, value):
chars = '"' + "_".join(chars.split()) + '"'
return chars

def _check_value(self, filename, value):
if selectors.esoteric_key(value):
values = [value]
else:
values = value.split("|")
if len(values) > 1:
self.verbose(filename, value, "is an or'ed parameter matching", values)
for val in values:
super(CharacterValidator, self)._check_value(filename, val)

# ----------------------------------------------------------------------------

class LogicalValidator(KeywordValidator):
Expand Down Expand Up @@ -320,6 +331,14 @@ def check_value(self, filename, value):

# ----------------------------------------------------------------------------

class JwstdateValidator(KeywordValidator):
"""Check &JWSTDATE date fields."""
def check_value(self, filename, value):
self.verbose(filename, value)
timestamp.Jwstdate.get_datetime(value)

# ----------------------------------------------------------------------------

class SlashdateValidator(KeywordValidator):
"""Validates &SLASHDATE fields."""
def check_value(self, filename, value):
Expand Down Expand Up @@ -370,27 +389,19 @@ def validator(info):

# ============================================================================

def get_validators(filename, observatory):
"""Given a reference file `filename`, return the observatory specific
list of Validators used to check that reference file type.
"""
locator = utils.get_locator_module(observatory)
# Get the cache key for this filetype.
key = locator.reference_name_to_validator_key(filename)
return validators_by_typekey(key, observatory)

@utils.cached
def validators_by_typekey(key, observatory):
"""Load and return the list of validators associated with reference type
validator `key`. Factored out because it is cached on parameters.
"""
locator = utils.get_locator_module(observatory)
# Make and cache Validators for `filename`s reference file type.
validators = [validator(x) for x in locator.get_tpninfos(*key)]
log.verbose("Validators for", repr(key), ":\n", log.PP(validators), verbosity=60)
"""
try:
validators = [validator(x) for x in locator.get_tpninfos(*key)]
log.verbose("Validators for", repr(key), ":\n", log.PP(validators), verbosity=60)
except Exception as exc:
raise RuntimeError("FAILED loading type contraints for " + repr(key) + " with " + repr(exc))
"""
return validators

# ============================================================================
Expand Down Expand Up @@ -431,6 +442,10 @@ def basename(self):
@property
def format_name(self):
return repr(self.original_name) if self.original_name else repr(self.basename)

@property
def locator(self):
return utils.get_locator_module(self.observatory)

def log_and_track_error(self, *args, **keys):
"""Output a log error on behalf of `msg`, tracking it for uniqueness if run inside a script."""
Expand All @@ -446,6 +461,37 @@ def certify(self):
raise NotImplementedError("Certify is an abstract class.")


def get_validators(self):
"""Given a reference file `filename`, return the observatory specific
list of Validators used to check that reference file type.
"""
# Get the cache key for this filetype.
validators = []
for key in self.locator.reference_name_to_validator_key(self.filename):
validators.extend(validators_by_typekey(key, self.observatory))
parkeys = set(self.get_rmap_parkeys())
validators = [ val for val in validators if val.name in parkeys ]
return validators


def get_corresponding_rmap(self):
"""Return the rmap which corresponds to self.filename under self.context."""
pmap = rmap.get_cached_mapping(self.context, ignore_checksum="warn")
instrument, filekind = pmap.locate.get_file_properties(self.filename)
return pmap.get_imap(instrument).get_rmap(filekind)

def get_rmap_parkeys(self):
"""Determine required parkeys in reference path `refname` according to pipeline
mapping `context`.
"""
if self.context is None:
return []
try:
return self.get_corresponding_rmap().get_required_parkeys()
except Exception as exc:
log.verbose_warning("Failed retrieving required parkeys:", str(exc))
return []

class ReferenceCertifier(Certifier):
"""Baseclass for most reference file certifier classes.
1. Check simple keywords against TPN files using the reftype's validators.
Expand All @@ -462,7 +508,7 @@ def __init__(self, *args, **keys):

def complex_init(self):
"""Can't do this until we at least know the file is loadable."""
self.simple_validators = get_validators(self.filename, self.observatory)
self.simple_validators = self.get_validators()
self.all_column_names = [ val.name for val in self.simple_validators if val.info.keytype == 'C' ]
self.all_simple_names = [ val.name for val in self.simple_validators if val.info.keytype != 'C' ]
self.mode_columns = self.get_mode_column_names()
Expand All @@ -471,7 +517,7 @@ def certify(self):
"""Certify `self.filename`, either reporting using log.error() or raising
ValidationError exceptions.
"""
with log.augment_exception("Error loading", self.format_name, exception_class=InvalidFormatError):
with log.augment_exception("Error loading", exception_class=InvalidFormatError):
self.header = self.load()
with log.augment_exception("Error locating constraints for", self.format_name, exception_class=TypeSetupError):
self.complex_init()
Expand All @@ -489,7 +535,20 @@ def certify_simple_parameters(self):

def load(self):
"""Load and parse header from self.filename"""
return data_file.get_header(self.filename, observatory=self.observatory, original_name=self.original_name)
header = data_file.get_header(self.filename, observatory=self.observatory, original_name=self.original_name)
if self.context:
r = self.get_corresponding_rmap()
if hasattr(r, "reference_to_dataset"):
# dataset_to_reference = utils.invert_dict(r.reference_to_dataset)
for key, val in header.items():
try:
header[r.reference_to_dataset[key]] = header[key]
except KeyError:
continue
instr = utils.header_to_instrument(header)
for key in crds.INSTRUMENT_KEYWORDS:
header[key] = instr
return header

def dump_provenance(self):
"""Dump out provenance keywords for informational purposes."""
Expand All @@ -498,7 +557,7 @@ def dump_provenance(self):
self.all_simple_names + # what's defined in .tpn's, maybe not matched
self.provenance_keys)) # extra project-specific keywords like HISTORY, COMMENT, PEDIGREE
unseen = self._dump_provenance_core(dump_keys)
log.verbose("Potential provenance keywords:", repr(dump_keys), verbosity=60)
log.verbose("Potential provenance keywords:", repr(dump_keys), verbosity=80)
warn_keys = self.provenance_keys
for key in sorted(unseen):
if key in warn_keys:
Expand Down Expand Up @@ -531,20 +590,6 @@ def interesting_value(self, value):
return False
return True

def get_rmap_parkeys(self):
"""Determine required parkeys in reference path `refname` according to pipeline
mapping `context`.
"""
if self.context is None:
return []
try:
pmap = rmap.get_cached_mapping(self.context, ignore_checksum="warn")
instrument, filekind = pmap.locate.get_file_properties(self.filename)
return pmap.get_imap(instrument).get_rmap(filekind).get_required_parkeys()
except Exception as exc:
log.verbose_warning("Failed retrieving required parkeys:", str(exc))
return []

def get_mode_column_names(self):
"""Return any column names of `self` defined to be mode columns by the corresponding rmap in `self.context`.
Expand All @@ -557,7 +602,7 @@ def get_mode_column_names(self):
mode_columns = []
with self.error_on_exception("Error finding unique row keys for", repr(self.basename)):
instrument, filekind = utils.get_file_properties(self.observatory, self.filename)
mode_columns = utils.get_locator_module(self.observatory).get_row_keys(instrument, filekind)
mode_columns = self.locator.get_row_keys(instrument, filekind)
if mode_columns:
log.info("Table unique row parameters defined as", repr(mode_columns))
else:
Expand Down Expand Up @@ -911,7 +956,7 @@ def certify_file(filename, context=None, dump_provenance=False, check_references
original_name: browser-side name of file if any, files
"""
try:
old_flag = log.set_exception_trap(trap_exceptions) # non-reentrant code, no threading
old_flag = log.set_exception_trap(trap_exceptions) # XXX non-reentrant code, no threading

if original_name is None:
original_name = filename
Expand Down Expand Up @@ -1081,6 +1126,12 @@ def main(self):
assert (self.args.comparison_reference is None) or not config.is_mapping_spec(self.args.comparison_reference), \
"Specified --comparison-reference file " + repr(self.args.comparison_reference) + " is not a reference."

if self.args.comparison_context and self.args.sync_files:
resolved_context = self.resolve_context(self.args.comparison_context)
self.sync_files([resolved_context])
if self.args.comparison_reference and self.args.sync_files:
self.sync_files([self.args.comparison_reference])

if not self.args.dont_recurse_mappings:
all_files = self.mapping_closure(self.files)
else:
Expand All @@ -1100,12 +1151,6 @@ def main(self):
log.info("No comparison context specified or specified as 'none'. No default context for all mappings or mixed types.")
self.args.comparison_context = None

if self.args.comparison_context and self.args.sync_files:
resolved_context = self.resolve_context(self.args.comparison_context)
self.sync_files([resolved_context])
if self.args.comparison_reference and self.args.sync_files:
self.sync_files([self.args.comparison_reference])

certify_files(sorted(all_files),
context=self.resolve_context(self.args.comparison_context),
comparison_reference=self.args.comparison_reference,
Expand Down
11 changes: 9 additions & 2 deletions crds/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from argparse import RawTextHelpFormatter

import crds
from crds import rmap, log, data_file, heavy_client, config, utils, exceptions
from crds.client import api
from crds import python23
Expand Down Expand Up @@ -184,8 +185,14 @@ def observatory(self):

obs = os.environ.get("CRDS_OBSERVATORY", None)
if obs:
self.set_server(obs.lower())

return self.set_server(obs.lower())

url = os.environ.get("CRDS_SERVER_URL", None)
if url is not None:
for obs in crds.ALL_OBSERVATORIES:
if obs in url.lower():
return self.set_server(obs)

files = []
if hasattr(self, "contexts"):
files += self.contexts
Expand Down
39 changes: 31 additions & 8 deletions crds/data_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,38 +212,48 @@ def get_header(filepath, needed_keys=(), original_name=None, observatory=None):
# A clearer name
get_unconditioned_header = get_header

# ----------------------------------------------------------------------------------------------

def get_data_model_header(filepath, needed_keys=()):
"""Get the header from `filepath` using the jwst data model."""
from jwst_lib import models
with models.open(filepath) as d_model:
flat_dict = d_model.to_flat_dict(include_arrays=False)
with log.augment_exception("JWST Data Model (jwst_lib.models)"):
with models.open(filepath) as d_model:
flat_dict = d_model.to_flat_dict(include_arrays=False)
d_header = sanitize_data_model_dict(flat_dict)
header = reduce_header(filepath, d_header, needed_keys)
d_header = reduce_header(filepath, d_header, needed_keys)
header = cross_strap_header(d_header)
return header

def get_json_header(filepath, needed_keys=()):
"""Return the flattened header associated with a JSON file."""
with open(filepath) as pfile:
header = json.load(pfile)
header = to_simple_types(header)
return reduce_header(filepath, header, needed_keys)
header = reduce_header(filepath, header, needed_keys)
header = cross_strap_header(header)
return header

def get_yaml_header(filepath, needed_keys=()):
"""Return the flattened header associated with a YAML file."""
import yaml
with open(filepath) as pfile:
header = yaml.load(pfile)
header = to_simple_types(header)
return reduce_header(filepath, header, needed_keys)

# ----------------------------------------------------------------------------------------------
header = reduce_header(filepath, header, needed_keys)
header = cross_strap_header(header)
return header

def get_asdf_header(filepath, needed_keys=()):
"""Return the flattened header associated with an ASDF file."""
import pyasdf
with pyasdf.AsdfFile.open(filepath) as handle:
header = to_simple_types(handle.tree)
return reduce_header(filepath, header, needed_keys)
header = reduce_header(filepath, header, needed_keys)
header = cross_strap_header(header)
return header

# ----------------------------------------------------------------------------------------------

def to_simple_types(tree):
"""Convert an ASDF tree structure to a flat dictionary of simple types with dotted path tree keys."""
Expand All @@ -268,6 +278,19 @@ def simple_type(value):
rval = "SUPRESSED_NONSTD_TYPE: " + repr(str(value.__class__.__name__))
return rval

def cross_strap_header(header):
"""Foreach DM keyword in header, add the corresponding FITS keyword, and vice versa."""
from crds.jwst import schema
crossed = dict(header)
for key, val in header.items():
fitskey = schema.dm_to_fits(key)
if fitskey is not None and fitskey not in crossed:
crossed[fitskey] = val
dmkey = schema.fits_to_dm(key)
if dmkey is not None and dmkey not in crossed:
crossed[dmkey] = val
return crossed

# ----------------------------------------------------------------------------------------------

DUPLICATES_OK = ["COMMENT", "HISTORY", "NAXIS"]
Expand Down
Loading

0 comments on commit 52fea6e

Please sign in to comment.