From dd6cac6830d6f5a0980638e65358fa1aaca687b3 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Wed, 2 Oct 2024 13:51:37 +0200 Subject: [PATCH 01/21] starting --- pyaerocom/io/__init__.py | 2 +- pyaerocom/io/{fileconventions.py => file_conventions.py} | 0 pyaerocom/io/helpers.py | 2 +- pyaerocom/io/readgridded.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename pyaerocom/io/{fileconventions.py => file_conventions.py} (100%) diff --git a/pyaerocom/io/__init__.py b/pyaerocom/io/__init__.py index 2c67cdd0b..cc84e2454 100644 --- a/pyaerocom/io/__init__.py +++ b/pyaerocom/io/__init__.py @@ -11,7 +11,7 @@ # low level EBAS I/O routines from .ebas_nasa_ames import EbasNasaAmesFile -from .fileconventions import FileConventionRead +from .file_conventions import FileConventionRead from .read_aasetal import ReadAasEtal # read base classes diff --git a/pyaerocom/io/fileconventions.py b/pyaerocom/io/file_conventions.py similarity index 100% rename from pyaerocom/io/fileconventions.py rename to pyaerocom/io/file_conventions.py diff --git a/pyaerocom/io/helpers.py b/pyaerocom/io/helpers.py index da04a192b..5a226ab19 100644 --- a/pyaerocom/io/helpers.py +++ b/pyaerocom/io/helpers.py @@ -94,7 +94,7 @@ def _print_read_info(i, mod, tot_num, last_t, name, logger): # pragma: no cover def get_metadata_from_filename(filename): """Try access metadata information from filename""" - from pyaerocom.io.fileconventions import FileConventionRead + from pyaerocom.io.file_conventions import FileConventionRead fc = FileConventionRead().from_file(filename) return fc.get_info_from_file(filename) diff --git a/pyaerocom/io/readgridded.py b/pyaerocom/io/readgridded.py index 90b62c70c..08212bd6b 100755 --- a/pyaerocom/io/readgridded.py +++ b/pyaerocom/io/readgridded.py @@ -44,7 +44,7 @@ multiply_cubes, subtract_cubes, ) -from pyaerocom.io.fileconventions import FileConventionRead +from pyaerocom.io.file_conventions import FileConventionRead from pyaerocom.io.gridded_reader import GriddedReader from pyaerocom.io.helpers import add_file_to_log from pyaerocom.io.iris_io import concatenate_iris_cubes, load_cubes_custom From 777dedf04a73e47054c381ca48b873d98a028e25 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Wed, 2 Oct 2024 16:39:10 +0200 Subject: [PATCH 02/21] Start ObsEntry as BaseModel --- pyaerocom/aeroval/collections.py | 6 +- pyaerocom/aeroval/obsentry.py | 185 ++++++++++++++++++++++- pyaerocom/colocation/colocation_setup.py | 2 +- pyaerocom/io/iris_io.py | 6 +- tests/aeroval/test_collections.py | 15 +- 5 files changed, 206 insertions(+), 8 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index f9d4c6fc9..56070dc5f 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -11,7 +11,7 @@ class BaseCollection(BrowseDict, abc.ABC): #: maximum length of entry names MAXLEN_KEYS = 25 #: Invalid chars in entry names - FORBIDDEN_CHARS_KEYS = ["_"] + FORBIDDEN_CHARS_KEYS = [] # "_" def _check_entry_name(self, key): if any([x in key for x in self.FORBIDDEN_CHARS_KEYS]): @@ -22,8 +22,8 @@ def _check_entry_name(self, key): def __setitem__(self, key, value): self._check_entry_name(key) - if "web_interface_name" in value: - self._check_entry_name(value["web_interface_name"]) + # if "web_interface_name" in value: + # self._check_entry_name(value["web_interface_name"]) super().__setitem__(key, value) def keylist(self, name_or_pattern: str = None) -> list: diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index e7f7e2fd4..d9c74caf7 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -1,8 +1,12 @@ import logging +from pathlib import Path from traceback import format_exc +from typing import Literal + +from pydantic import BaseModel, ConfigDict, field_validator, model_validator from pyaerocom import const -from pyaerocom._lowlevel_helpers import BrowseDict, ListOfStrings, StrType +from pyaerocom._lowlevel_helpers import BrowseDict, LayerLimits, ListOfStrings, StrType from pyaerocom.exceptions import InitialisationError from pyaerocom.metastandards import DataSource @@ -210,3 +214,182 @@ def _check_ovt(self, ovt): f"Invalid value for obs_vert_type: {self.obs_vert_type}. " f"Supported codes are {valid}." ) + + +class ObsEntry_new(BaseModel): + """Observation configuration for evaluation (BaseMode) + + Note + ---- + Only :attr:`obs_id` and `obs_vars` are mandatory, the rest is optional. + + Attributes + ---------- + obs_id : str + ID of observation network in AeroCom database + (e.g. 'AeronetSunV3Lev2.daily') + obs_vars : tuple[str, ...] + tuple of pyaerocom variable names that are supposed to be analysed + (e.g. ('od550aer', 'ang4487aer')) + obs_ts_type_read : :obj:`str` or :obj:`dict`, optional + may be specified to explicitly define the reading frequency of the + observation data (so far, this does only apply to gridded obsdata such + as satellites). For ungridded reading, the frequency may be specified + via :attr:`obs_id`, where applicable (e.g. AeronetSunV3Lev2.daily). + Can be specified variable specific in form of dictionary. + obs_vert_type : str, optional + Aerocom vertical code encoded in the model filenames (only AeroCom 3 + and later). + obs_aux_requires : dict, optional + information about required datasets / variables for auxiliary + variables. + instr_vert_loc : str, optional + vertical location code of observation instrument. This is used in + the aeroval interface for separating different categories of measurements + such as "ground", "space" or "airborne". + is_superobs : bool + if True, this observation is a combination of several others which all + have to have their own obs config entry. + only_superobs : bool + this indicates whether this configuration is only to be used as part + of a superobs network, and not individually. + read_opts_ungridded : :obj:`dict`, optional + dictionary that specifies reading constraints for ungridded reading + (c.g. :class:`pyaerocom.io.ReadUngridded`). + only_json : bool + Only to be set if the obs entry already has colocated data files which were + preprocessed outside of pyaerocom. Setting to True will skip the colcoation + and just create the JSON output. + coldata_dir : str + Only to be set if the obs entry already has colocated data files which were + preprocessed outside of pyaerocom. This is the directory in which the + colocated data files are located. + + """ + + ## Pydantic ConfigDict + model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow", protected_namespaces=()) + + SUPPORTED_VERT_CODES = ( + "Column", + "Profile", + "Surface", + ) # Lb: may be redundant with obs_vert_type below + ALT_NAMES_VERT_CODES = dict(ModelLevel="Profile") + + SUPPORTED_VERT_LOCS = [ + "ground", + "space", + "airborne", + ] # LB: Originally DataSource.SUPPORTED_VERT_LOCS, but that is achild class of BrowseDict so hardcode here for now + + ###################### + ## Required attributes + ###################### + obs_vars: tuple[str, ...] | str + obs_vert_type: str + ###################### + ## Optional attributes + ###################### + obs_id: str = "" + obs_ts_type_read: str | dict | None = None + obs_vert_type: Literal["Column", "Profile", "Surface"] = "Surface" + obs_aux_requires: dict = {} + instr_vert_loc: str | None = None + is_superobs: bool = False + only_superobs: bool = False + colocation_layer_limts: tuple[LayerLimits, ...] | None = None + profile_layer_limits: tuple[LayerLimits, ...] | None = None + + read_opts_ungridded: dict = {} + # attributes for reading colocated data files made outside of pyaerocom + only_json: bool = False + coldata_dir: str | Path | None = None + + ############# + ## Validators + ############# + @field_validator("obs_vars") + @classmethod + def validate_obs_vars(cls, v): + if isinstance(v, str): + return (v,) + return v + + @field_validator("instr_vert_loc") + @classmethod + def validate_instr_vert_loc(cls, v): + if isinstance(v, str) and v not in cls.SUPPORTED_VERT_LOCS: + raise AttributeError( + f"Invalid value for instr_vert_loc: {v} for {cls.obs_id}. " + f"Please choose from: {cls.SUPPORTED_VERT_LOCS}" + ) + + @model_validator(mode="after") + def check_cfg(self): + if not self.is_superobs and not isinstance(self.obs_id, str | dict): + raise ValueError( + f"Invalid value for obs_id: {self.obs_id}. Need str or dict " + f"or specification of ids and variables via obs_compute_post" + ) + return self + + ########## + ## Methods + ########## + + def check_add_obs(self): + """Check if this dataset is an auxiliary post dataset""" + if len(self.obs_aux_requires) > 0: + if ( + not self.obs_type == "ungridded" + ): # LB: Not sure where self.obs_type is coming from. May need to think about this + raise NotImplementedError( + f"Cannot initialise auxiliary setup for {self.obs_id}. " + f"Aux obs reading is so far only possible for ungridded observations." + ) + if self.obs_id not in const.OBS_IDS_UNGRIDDED: + try: + const.add_ungridded_post_dataset(**self.model_dump()) + except Exception: + raise InitialisationError( + f"Cannot initialise auxiliary reading setup for {self.obs_id}. " + f"Reason:\n{format_exc()}" + ) + + def get_all_vars(self) -> tuple[str, ...]: + """ + Get list of all variables associated with this entry + + Returns + ------- + tuple[str, ...] + """ + return self.obs_vars + + def has_var(self, var_name): + """ + Check if input variable is defined in entry + + Returns + ------- + bool + True if entry has variable available, else False + """ + return True if var_name in self.get_all_vars() else False + + def get_vert_code(self, var): + """Get vertical code name for obs / var combination""" + vc = self.obs_vert_type + if isinstance(vc, str): + val = vc + elif isinstance(vc, dict) and var in vc: + val = vc[var] + else: + raise ValueError(f"invalid value for obs_vert_type: {vc}") + if val not in self.SUPPORTED_VERT_CODES: + raise ValueError( + f"invalid value for obs_vert_type: {val}. Choose from " + f"{self.SUPPORTED_VERT_CODES}." + ) + return val diff --git a/pyaerocom/colocation/colocation_setup.py b/pyaerocom/colocation/colocation_setup.py index d0703f18d..e53513339 100644 --- a/pyaerocom/colocation/colocation_setup.py +++ b/pyaerocom/colocation/colocation_setup.py @@ -320,7 +320,7 @@ class ColocationSetup(BaseModel): @classmethod def validate_obs_vars(cls, v): if isinstance(v, str): - return [v] + return (v,) return v ts_type: str diff --git a/pyaerocom/io/iris_io.py b/pyaerocom/io/iris_io.py index 21ead8692..6c499e0dc 100644 --- a/pyaerocom/io/iris_io.py +++ b/pyaerocom/io/iris_io.py @@ -35,7 +35,7 @@ VariableDefinitionError, ) from pyaerocom.helpers import cftime_to_datetime64, make_datetimeindex_from_year -from pyaerocom.io.fileconventions import FileConventionRead +from pyaerocom.io.file_conventions import FileConventionRead from pyaerocom.io.helpers import add_file_to_log from pyaerocom.tstype import TsType @@ -218,7 +218,9 @@ def check_dim_coord_names_cube(cube): from pyaerocom import const coords = dict( - lon=const.COORDINFO["lon"], lat=const.COORDINFO["lat"], time=const.COORDINFO["time"] + lon=const.COORDINFO["lon"], + lat=const.COORDINFO["lat"], + time=const.COORDINFO["time"], ) for coord in cube.dim_coords: diff --git a/tests/aeroval/test_collections.py b/tests/aeroval/test_collections.py index 5dc04772c..5f2a98f7c 100644 --- a/tests/aeroval/test_collections.py +++ b/tests/aeroval/test_collections.py @@ -1,4 +1,4 @@ -from pyaerocom.aeroval.collections import ObsCollection +from pyaerocom.aeroval.collections import ObsCollection, ModelCollection def test_obscollection(): @@ -13,3 +13,16 @@ def test_obscollection(): ) assert "AN-EEA-MP" in oc + + +def test_modelcollection(): + mc = ModelCollection(model1=dict(model_id="bla", obs_vars="od550aer", obs_vert_type="Column")) + assert mc + + mc["ECMWF_OSUITE"] = dict( + model_id="ECMWF_OSUITE", + obs_vars=["concpm10"], + obs_vert_type="Surface", + ) + + assert "ECMWF_OSUITE" in mc From e7be56ad4354ddbd7bf9a5ad1225577be104b693 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Thu, 3 Oct 2024 13:55:22 +0200 Subject: [PATCH 03/21] working through aeroval tests --- pyaerocom/aeroval/collections.py | 4 ++-- pyaerocom/aeroval/helpers.py | 4 ++-- pyaerocom/aeroval/obsentry.py | 23 +++++++++++++++-------- pyaerocom/aeroval/setup_classes.py | 2 +- pyaerocom/aeroval/superobs_engine.py | 8 ++++---- tests/aeroval/test_setup_classes.py | 7 ++++--- 6 files changed, 28 insertions(+), 20 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index 56070dc5f..df96ce954 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -107,7 +107,7 @@ def get_entry(self, key) -> object: """ try: entry = self[key] - entry["obs_name"] = self.get_web_iface_name(key) + entry.obs_name = self.get_web_iface_name(key) return entry except (KeyError, AttributeError): raise EntryNotAvailable(f"no such entry {key}") @@ -167,7 +167,7 @@ def web_iface_names(self) -> list: @property def all_vert_types(self): """List of unique vertical types specified in this collection""" - return list({x["obs_vert_type"] for x in self.values()}) + return list({x.obs_vert_type for x in self.values()}) class ModelCollection(BaseCollection): diff --git a/pyaerocom/aeroval/helpers.py b/pyaerocom/aeroval/helpers.py index 4a855249b..0a692161f 100644 --- a/pyaerocom/aeroval/helpers.py +++ b/pyaerocom/aeroval/helpers.py @@ -162,7 +162,7 @@ def make_dummy_model(obs_list: list, cfg) -> str: tmp_var_obj = Variable() # Loops over variables in obs for obs in obs_list: - for var in cfg.obs_cfg[obs]["obs_vars"]: + for var in cfg.obs_cfg[obs].obs_vars: # Create dummy cube dummy_cube = make_dummy_cube(var, start_yr=start, stop_yr=stop, freq=freq) @@ -185,7 +185,7 @@ def make_dummy_model(obs_list: list, cfg) -> str: for dummy_grid_yr in yr_gen: # Add to netcdf yr = dummy_grid_yr.years_avail()[0] - vert_code = cfg.obs_cfg[obs]["obs_vert_type"] + vert_code = cfg.obs_cfg[obs].obs_vert_type save_name = dummy_grid_yr.aerocom_savename(model_id, var, vert_code, yr, freq) dummy_grid_yr.to_netcdf(outdir, savename=save_name) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index d9c74caf7..7efcb138d 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -class ObsEntry(BrowseDict): +class ObsEntry_old(BrowseDict): """Observation configuration for evaluation (dictionary) Note @@ -216,7 +216,7 @@ def _check_ovt(self, ovt): ) -class ObsEntry_new(BaseModel): +class ObsEntry(BaseModel): """Observation configuration for evaluation (BaseMode) Note @@ -270,18 +270,23 @@ class ObsEntry_new(BaseModel): ## Pydantic ConfigDict model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow", protected_namespaces=()) - SUPPORTED_VERT_CODES = ( + SUPPORTED_VERT_CODES: tuple[ + str, + str, + str, + ] = ( "Column", "Profile", "Surface", ) # Lb: may be redundant with obs_vert_type below - ALT_NAMES_VERT_CODES = dict(ModelLevel="Profile") + ALT_NAMES_VERT_CODES: dict = dict(ModelLevel="Profile") - SUPPORTED_VERT_LOCS = [ + SUPPORTED_VERT_LOCS: tuple[str, str, str] = ( "ground", "space", "airborne", - ] # LB: Originally DataSource.SUPPORTED_VERT_LOCS, but that is achild class of BrowseDict so hardcode here for now + ) + # LB: Originally DataSource.SUPPORTED_VERT_LOCS, but that is achild class of BrowseDict so hardcode here for now ###################### ## Required attributes @@ -291,7 +296,7 @@ class ObsEntry_new(BaseModel): ###################### ## Optional attributes ###################### - obs_id: str = "" + obs_id: tuple[str, ...] | str = "" obs_ts_type_read: str | dict | None = None obs_vert_type: Literal["Column", "Profile", "Surface"] = "Surface" obs_aux_requires: dict = {} @@ -304,7 +309,9 @@ class ObsEntry_new(BaseModel): read_opts_ungridded: dict = {} # attributes for reading colocated data files made outside of pyaerocom only_json: bool = False - coldata_dir: str | Path | None = None + coldata_dir: str | Path | None = ( + None # Lb: Would like this to be a Path but need to see if it will cause issues down the line + ) ############# ## Validators diff --git a/pyaerocom/aeroval/setup_classes.py b/pyaerocom/aeroval/setup_classes.py index 3a7ccec12..6fda955e1 100644 --- a/pyaerocom/aeroval/setup_classes.py +++ b/pyaerocom/aeroval/setup_classes.py @@ -529,7 +529,7 @@ def serialize_model_cfg(self, model_cfg: ModelCollection): ########################### def get_obs_entry(self, obs_name) -> dict: - return self.obs_cfg.get_entry(obs_name).to_dict() + return self.obs_cfg.get_entry(obs_name).model_dump() def get_model_entry(self, model_name) -> dict: """Get model entry configuration diff --git a/pyaerocom/aeroval/superobs_engine.py b/pyaerocom/aeroval/superobs_engine.py index 988345544..fdae8d8b7 100644 --- a/pyaerocom/aeroval/superobs_engine.py +++ b/pyaerocom/aeroval/superobs_engine.py @@ -29,7 +29,7 @@ def _process_entry(self, model_name, obs_name, var_list, try_colocate_if_missing sobs_cfg = self.cfg.obs_cfg.get_entry(obs_name) if var_list is None: - var_list = sobs_cfg["obs_vars"] + var_list = sobs_cfg.obs_vars elif isinstance(var_list, str): var_list = [var_list] elif not isinstance(var_list, list): @@ -75,8 +75,8 @@ def _run_var(self, model_name, obs_name, var_name, try_colocate_if_missing): coldata_files = [] coldata_resolutions = [] vert_codes = [] - obs_needed = self.cfg.obs_cfg[obs_name]["obs_id"] - vert_code = self.cfg.obs_cfg.get_entry(obs_name)["obs_vert_type"] + obs_needed = self.cfg.obs_cfg[obs_name].obs_id + vert_code = self.cfg.obs_cfg.get_entry(obs_name).obs_vert_type for oname in obs_needed: fp, ts_type, vert_code = self._get_coldata_fileinfo( model_name, oname, var_name, try_colocate_if_missing @@ -141,5 +141,5 @@ def _get_coldata_fileinfo(self, model_name, obs_name, var_name, try_colocate_if_ fp = cdf[0] meta = ColocatedData.get_meta_from_filename(fp) ts_type = meta["ts_type"] - vert_code = self.cfg.obs_cfg.get_entry(obs_name)["obs_vert_type"] + vert_code = self.cfg.obs_cfg.get_entry(obs_name).obs_vert_type return (fp, ts_type, vert_code) diff --git a/tests/aeroval/test_setup_classes.py b/tests/aeroval/test_setup_classes.py index c9ce5485b..7c6fd06e6 100644 --- a/tests/aeroval/test_setup_classes.py +++ b/tests/aeroval/test_setup_classes.py @@ -57,9 +57,10 @@ def test_EvalSetup_INVALID_ENTRY_NAMES(cfg_exp1: dict, error: str): EvalSetup(**cfg_exp1) assert error in str(e.value) - with pytest.raises(EvalEntryNameError) as e: - EvalSetup.model_validate(cfg_exp1) - assert error in str(e.value) + +# with pytest.raises(EvalEntryNameError) as e: +# EvalSetup.model_validate(cfg_exp1) +# assert error in str(e.value) @pytest.mark.parametrize( From 832805ccc0ec7f0dd93365f4759fc796d370103c Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Thu, 3 Oct 2024 14:24:00 +0200 Subject: [PATCH 04/21] super obs fishy WIP --- pyaerocom/aeroval/obsentry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index 7efcb138d..fc6126fad 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -296,7 +296,7 @@ class ObsEntry(BaseModel): ###################### ## Optional attributes ###################### - obs_id: tuple[str, ...] | str = "" + obs_id: str | dict = "" # dict in case of super obs?? obs_ts_type_read: str | dict | None = None obs_vert_type: Literal["Column", "Profile", "Surface"] = "Surface" obs_aux_requires: dict = {} From 1d6f27e20bda51fb6e4b3e055ac393155c9a1954 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Fri, 4 Oct 2024 14:37:54 +0200 Subject: [PATCH 05/21] simplify get_diurnal --- pyaerocom/aeroval/_processing_base.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pyaerocom/aeroval/_processing_base.py b/pyaerocom/aeroval/_processing_base.py index 68853d18c..8a1634c1f 100644 --- a/pyaerocom/aeroval/_processing_base.py +++ b/pyaerocom/aeroval/_processing_base.py @@ -92,12 +92,7 @@ def _get_diurnal_only(self, obs_name): ------- diurnal_only : bool """ - entry = self.cfg.get_obs_entry(obs_name) - try: - diurnal_only = entry["diurnal_only"] - except KeyError: - diurnal_only = False - return diurnal_only + return self.cfg.get_obs_entry(obs_name).get("diurnal_only", False) def get_colocator(self, model_name: str = None, obs_name: str = None) -> Colocator: """ From e3ba39d4fd7199ff922bc85c2e27cd4d4b34d959 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Fri, 4 Oct 2024 14:38:29 +0200 Subject: [PATCH 06/21] obs_id can be a tuple in case of superobs --- pyaerocom/aeroval/obsentry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index fc6126fad..f95bfd57f 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -296,7 +296,7 @@ class ObsEntry(BaseModel): ###################### ## Optional attributes ###################### - obs_id: str | dict = "" # dict in case of super obs?? + obs_id: str | tuple[str, ...] = "" # dict in case of super obs?? obs_ts_type_read: str | dict | None = None obs_vert_type: Literal["Column", "Profile", "Surface"] = "Surface" obs_aux_requires: dict = {} From 8c83370705a883c7a5daeab181c0cd5bee04428d Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Fri, 4 Oct 2024 15:26:30 +0200 Subject: [PATCH 07/21] can be tuple --- pyaerocom/aeroval/obsentry.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index f95bfd57f..4d17bcbe7 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -334,9 +334,9 @@ def validate_instr_vert_loc(cls, v): @model_validator(mode="after") def check_cfg(self): - if not self.is_superobs and not isinstance(self.obs_id, str | dict): + if not self.is_superobs and not isinstance(self.obs_id, str | tuple | dict): raise ValueError( - f"Invalid value for obs_id: {self.obs_id}. Need str or dict " + f"Invalid value for obs_id: {self.obs_id}. Need str, tuple, or dict " f"or specification of ids and variables via obs_compute_post" ) return self From 51cf8b58d5880001a2077c3f43b842200ec2b0e1 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Fri, 4 Oct 2024 16:23:05 +0200 Subject: [PATCH 08/21] WIP --- pyaerocom/aeroval/obsentry.py | 20 ++++++++-- tests/aeroval/test_setup_classes.py | 60 ++++++++++++++++------------- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index 4d17bcbe7..1a2cd3b23 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -3,7 +3,12 @@ from traceback import format_exc from typing import Literal -from pydantic import BaseModel, ConfigDict, field_validator, model_validator +from pydantic import ( + BaseModel, + ConfigDict, + field_validator, + model_validator, +) from pyaerocom import const from pyaerocom._lowlevel_helpers import BrowseDict, LayerLimits, ListOfStrings, StrType @@ -228,6 +233,7 @@ class ObsEntry(BaseModel): obs_id : str ID of observation network in AeroCom database (e.g. 'AeronetSunV3Lev2.daily') + Note that this can also be a custom supplied obs_id if and only if bs_aux_requires is provided obs_vars : tuple[str, ...] tuple of pyaerocom variable names that are supposed to be analysed (e.g. ('od550aer', 'ang4487aer')) @@ -296,10 +302,10 @@ class ObsEntry(BaseModel): ###################### ## Optional attributes ###################### - obs_id: str | tuple[str, ...] = "" # dict in case of super obs?? + obs_id: str | tuple[str, ...] = "" obs_ts_type_read: str | dict | None = None obs_vert_type: Literal["Column", "Profile", "Surface"] = "Surface" - obs_aux_requires: dict = {} + obs_aux_requires: dict[str, dict] = {} instr_vert_loc: str | None = None is_superobs: bool = False only_superobs: bool = False @@ -332,6 +338,13 @@ def validate_instr_vert_loc(cls, v): f"Please choose from: {cls.SUPPORTED_VERT_LOCS}" ) + @field_validator("obs_aux_requires") + @classmethod + def validate_obs_aux_requires(cls, v): + if len(cls.obs_aux_requires) > 0: + if "obs_type" not in cls.model_fields: + raise ValueError("obs_type is required if obs_aux_requires is given") + @model_validator(mode="after") def check_cfg(self): if not self.is_superobs and not isinstance(self.obs_id, str | tuple | dict): @@ -339,6 +352,7 @@ def check_cfg(self): f"Invalid value for obs_id: {self.obs_id}. Need str, tuple, or dict " f"or specification of ids and variables via obs_compute_post" ) + self.check_add_obs() return self ########## diff --git a/tests/aeroval/test_setup_classes.py b/tests/aeroval/test_setup_classes.py index 7c6fd06e6..ad3668170 100644 --- a/tests/aeroval/test_setup_classes.py +++ b/tests/aeroval/test_setup_classes.py @@ -5,8 +5,7 @@ import pytest from pyaerocom.aeroval import EvalSetup -from pyaerocom.exceptions import EvalEntryNameError -from tests.fixtures.aeroval.cfg_test_exp1 import CFG, MODELS, OBS_GROUNDBASED +from tests.fixtures.aeroval.cfg_test_exp1 import CFG from pyaerocom.aeroval.modelmaps_helpers import CONTOUR @@ -32,30 +31,39 @@ def test_EvalSetup(cfg_exp1: dict): assert EvalSetup(**cfg_exp1) == EvalSetup.model_validate(cfg_exp1) -@pytest.mark.parametrize( - "update,error", - [ - pytest.param( - dict(model_cfg=dict(WRONG_MODEL=MODELS["TM5-AP3-CTRL"])), - "Invalid name: WRONG_MODEL", - id="model_cfg", - ), - pytest.param( - dict(obs_cfg=dict(WRONG_OBS=OBS_GROUNDBASED["AERONET-Sun"])), - "Invalid name: WRONG_OBS", - id="obs_cfg", - ), - pytest.param( - dict(obs_cfg=dict(OBS=dict(web_interface_name="WRONG_OBS"))), - "Invalid name: WRONG_OBS", - id="web_interface_name", - ), - ], -) -def test_EvalSetup_INVALID_ENTRY_NAMES(cfg_exp1: dict, error: str): - with pytest.raises(EvalEntryNameError) as e: - EvalSetup(**cfg_exp1) - assert error in str(e.value) +# @pytest.mark.parametrize( +# "update,error", +# [ +# pytest.param( +# dict(model_cfg=dict(WRONG_MODEL=MODELS["TM5-AP3-CTRL"])), +# "Invalid name: WRONG_MODEL", +# id="model_cfg", +# ), +# pytest.param( +# dict(obs_cfg=dict(WRONG_OBS=OBS_GROUNDBASED["AERONET-Sun"])), +# "Invalid name: WRONG_OBS", +# id="obs_cfg", +# ), +# pytest.param( +# dict( +# obs_cfg=dict( +# # obs_vars=("concpm10",), +# OBS=dict( +# obs_vars=("concpm10",), +# web_interface_name="WRONG_OBS", +# ), +# ) +# ), +# "Invalid name: WRONG_OBS", +# id="web_interface_name", +# ), +# ], +# ) +# def test_EvalSetup_INVALID_ENTRY_NAMES(cfg_exp1: dict, error: str): +# # with pytest.raises(EvalEntryNameError) as e: +# stp = EvalSetup(**cfg_exp1) +# assert stp +# # assert error in str(e.value) # with pytest.raises(EvalEntryNameError) as e: From 20176c45e84f0fd61364c18dfe02d8e7fa469eee Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Fri, 4 Oct 2024 16:44:29 +0200 Subject: [PATCH 09/21] passes MOS tests --- pyaerocom/aeroval/collections.py | 11 ++++------- pyaerocom/aeroval/experiment_output.py | 2 +- pyaerocom/aeroval/obsentry.py | 1 + 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index df96ce954..35e755523 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -107,7 +107,7 @@ def get_entry(self, key) -> object: """ try: entry = self[key] - entry.obs_name = self.get_web_iface_name(key) + entry.obs_name = self.get_web_interface_name(key) return entry except (KeyError, AttributeError): raise EntryNotAvailable(f"no such entry {key}") @@ -127,7 +127,7 @@ def get_all_vars(self) -> list[str]: vars.extend(ocfg.get_all_vars()) return sorted(list(set(vars))) - def get_web_iface_name(self, key): + def get_web_interface_name(self, key): """ Get webinterface name for entry @@ -148,10 +148,7 @@ def get_web_iface_name(self, key): corresponding name """ - entry = self[key] - if "web_interface_name" not in entry: - return key - return entry["web_interface_name"] + return self[key].web_interface_name if self[key].web_interface_name is not None else key @property def web_iface_names(self) -> list: @@ -162,7 +159,7 @@ def web_iface_names(self) -> list: ------- list """ - return [self.get_web_iface_name(key) for key in self.keylist()] + return [self.get_web_interface_name(key) for key in self.keylist()] @property def all_vert_types(self): diff --git a/pyaerocom/aeroval/experiment_output.py b/pyaerocom/aeroval/experiment_output.py index 19e782795..c6aa1fbcf 100644 --- a/pyaerocom/aeroval/experiment_output.py +++ b/pyaerocom/aeroval/experiment_output.py @@ -752,7 +752,7 @@ def _is_part_of_experiment(self, obs_name, obs_var, mod_name, mod_var) -> bool: allobs = self.cfg.obs_cfg obs_matches = [] for key, ocfg in allobs.items(): - if obs_name == allobs.get_web_iface_name(key): + if obs_name == allobs.get_web_interface_name(key): obs_matches.append(ocfg) if len(obs_matches) == 0: self._invalid["obs"].append(obs_name) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index 1a2cd3b23..5dffae8f1 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -311,6 +311,7 @@ class ObsEntry(BaseModel): only_superobs: bool = False colocation_layer_limts: tuple[LayerLimits, ...] | None = None profile_layer_limits: tuple[LayerLimits, ...] | None = None + web_interface_name: str | None = None read_opts_ungridded: dict = {} # attributes for reading colocated data files made outside of pyaerocom From bdd943559d79868a50f18f3d67de53712a0b6e62 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Fri, 4 Oct 2024 16:47:51 +0200 Subject: [PATCH 10/21] rm old ObsEntry class --- pyaerocom/aeroval/obsentry.py | 213 +--------------------------------- 1 file changed, 1 insertion(+), 212 deletions(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index 5dffae8f1..af62a98db 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -11,216 +11,12 @@ ) from pyaerocom import const -from pyaerocom._lowlevel_helpers import BrowseDict, LayerLimits, ListOfStrings, StrType +from pyaerocom._lowlevel_helpers import LayerLimits from pyaerocom.exceptions import InitialisationError -from pyaerocom.metastandards import DataSource logger = logging.getLogger(__name__) -class ObsEntry_old(BrowseDict): - """Observation configuration for evaluation (dictionary) - - Note - ---- - Only :attr:`obs_id` and `obs_vars` are mandatory, the rest is optional. - - Attributes - ---------- - obs_id : str - ID of observation network in AeroCom database - (e.g. 'AeronetSunV3Lev2.daily') - obs_vars : list - list of pyaerocom variable names that are supposed to be analysed - (e.g. ['od550aer', 'ang4487aer']) - obs_ts_type_read : :obj:`str` or :obj:`dict`, optional - may be specified to explicitly define the reading frequency of the - observation data (so far, this does only apply to gridded obsdata such - as satellites). For ungridded reading, the frequency may be specified - via :attr:`obs_id`, where applicable (e.g. AeronetSunV3Lev2.daily). - Can be specified variable specific in form of dictionary. - obs_vert_type : str, optional - Aerocom vertical code encoded in the model filenames (only AeroCom 3 - and later). - obs_aux_requires : dict, optional - information about required datasets / variables for auxiliary - variables. - instr_vert_loc : str, optional - vertical location code of observation instrument. This is used in - the aeroval interface for separating different categories of measurements - such as "ground", "space" or "airborne". - is_superobs : bool - if True, this observation is a combination of several others which all - have to have their own obs config entry. - only_superobs : bool - this indicates whether this configuration is only to be used as part - of a superobs network, and not individually. - read_opts_ungridded : :obj:`dict`, optional - dictionary that specifies reading constraints for ungridded reading - (c.g. :class:`pyaerocom.io.ReadUngridded`). - only_json : bool - Only to be set if the obs entry already has colocated data files which were - preprocessed outside of pyaerocom. Setting to True will skip the colcoation - and just create the JSON output. - coldata_dir : str - Only to be set if the obs entry already has colocated data files which were - preprocessed outside of pyaerocom. This is the directory in which the - colocated data files are located. - - """ - - SUPPORTED_VERT_CODES = ["Column", "Profile", "Surface"] # , "2D"] - ALT_NAMES_VERT_CODES = dict(ModelLevel="Profile") - - SUPPORTED_VERT_LOCS = DataSource.SUPPORTED_VERT_LOCS - - obs_vars = ListOfStrings() - obs_vert_type = StrType() - - def __init__(self, **kwargs): - self.obs_id = "" - - self.obs_vars = [] - self.obs_ts_type_read = None - self.obs_vert_type = "" - self.obs_aux_requires = {} - self.instr_vert_loc = None - - self.is_superobs = False - self.only_superobs = False - self.colocation_layer_limts = None - self.profile_layer_limits = None - - self.read_opts_ungridded = {} - # attributes for reading colocated data files made outside of pyaerocom - self.only_json = False - self.coldata_dir = None - - self.update(**kwargs) - self.check_cfg() - self.check_add_obs() - - def check_add_obs(self): - """Check if this dataset is an auxiliary post dataset""" - if len(self.obs_aux_requires) > 0: - if not self.obs_type == "ungridded": - raise NotImplementedError( - f"Cannot initialise auxiliary setup for {self.obs_id}. " - f"Aux obs reading is so far only possible for ungridded observations." - ) - if self.obs_id not in const.OBS_IDS_UNGRIDDED: - try: - const.add_ungridded_post_dataset(**self) - except Exception: - raise InitialisationError( - f"Cannot initialise auxiliary reading setup for {self.obs_id}. " - f"Reason:\n{format_exc()}" - ) - - def get_all_vars(self) -> list: - """ - Get list of all variables associated with this entry - - Returns - ------- - list - DESCRIPTION. - - """ - return self.obs_vars - - def has_var(self, var_name): - """ - Check if input variable is defined in entry - - Returns - ------- - bool - True if entry has variable available, else False - """ - return True if var_name in self.get_all_vars() else False - - def get_vert_code(self, var): - """Get vertical code name for obs / var combination""" - vc = self["obs_vert_type"] - if isinstance(vc, str): - val = vc - elif isinstance(vc, dict) and var in vc: - val = vc[var] - else: - raise ValueError(f"invalid value for obs_vert_type: {vc}") - if val not in self.SUPPORTED_VERT_CODES: - raise ValueError( - f"invalid value for obs_vert_type: {val}. Choose from " - f"{self.SUPPORTED_VERT_CODES}." - ) - return val - - def check_cfg(self): - """Check that minimum required attributes are set and okay""" - - if not self.is_superobs and not isinstance(self.obs_id, str | dict): - raise ValueError( - f"Invalid value for obs_id: {self.obs_id}. Need str or dict " - f"or specification of ids and variables via obs_compute_post" - ) - if isinstance(self.obs_vars, str): - self.obs_vars = [self.obs_vars] - elif not isinstance(self.obs_vars, list): - raise ValueError(f"Invalid input for obs_vars. Need list or str, got: {self.obs_vars}") - ovt = self.obs_vert_type - if ovt is None: - raise ValueError( - f"obs_vert_type is not defined. Please specify " - f"using either of the available codes: {self.SUPPORTED_VERT_CODES}. " - f"It may be specified for all variables (as string) " - f"or per variable using a dict" - ) - elif isinstance(ovt, str) and ovt not in self.SUPPORTED_VERT_CODES: - self.obs_vert_type = self._check_ovt(ovt) - elif isinstance(self.obs_vert_type, dict): - for var_name, val in self.obs_vert_type.items(): - if val not in self.SUPPORTED_VERT_CODES: - raise ValueError( - f"Invalid value for obs_vert_type: {self.obs_vert_type} " - f"(variable {var_name}). Supported codes are {self.SUPPORTED_VERT_CODES}." - ) - ovl = self.instr_vert_loc - if isinstance(ovl, str) and ovl not in self.SUPPORTED_VERT_LOCS: - raise AttributeError( - f"Invalid value for instr_vert_loc: {ovl} for {self.obs_id}. " - f"Please choose from: {self.SUPPORTED_VERT_LOCS}" - ) - - def _check_ovt(self, ovt): - """Check if obs_vert_type string is valid alias - - Parameters - ---------- - ovt : str - obs_vert_type string - - Returns - ------- - str - valid obs_vert_type - - Raises - ------ - ValueError - if `ovt` is invalid - """ - if ovt in self.ALT_NAMES_VERT_CODES: - _ovt = self.ALT_NAMES_VERT_CODES[ovt] - logger.warning(f"Please use {_ovt} for obs_vert_code and not {ovt}") - return _ovt - valid = self.SUPPORTED_VERT_CODES + list(self.ALT_NAMES_VERT_CODES) - raise ValueError( - f"Invalid value for obs_vert_type: {self.obs_vert_type}. " - f"Supported codes are {valid}." - ) - - class ObsEntry(BaseModel): """Observation configuration for evaluation (BaseMode) @@ -339,13 +135,6 @@ def validate_instr_vert_loc(cls, v): f"Please choose from: {cls.SUPPORTED_VERT_LOCS}" ) - @field_validator("obs_aux_requires") - @classmethod - def validate_obs_aux_requires(cls, v): - if len(cls.obs_aux_requires) > 0: - if "obs_type" not in cls.model_fields: - raise ValueError("obs_type is required if obs_aux_requires is given") - @model_validator(mode="after") def check_cfg(self): if not self.is_superobs and not isinstance(self.obs_id, str | tuple | dict): From fc2bf8d6eaf058e121ef873afcf7d349f799a903 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Fri, 4 Oct 2024 17:42:13 +0200 Subject: [PATCH 11/21] clean up and validate assignment --- pyaerocom/aeroval/_processing_base.py | 4 +-- pyaerocom/aeroval/obsentry.py | 19 +++++++------ tests/aeroval/test_setup_classes.py | 40 --------------------------- 3 files changed, 13 insertions(+), 50 deletions(-) diff --git a/pyaerocom/aeroval/_processing_base.py b/pyaerocom/aeroval/_processing_base.py index 8a1634c1f..e9f0829ad 100644 --- a/pyaerocom/aeroval/_processing_base.py +++ b/pyaerocom/aeroval/_processing_base.py @@ -122,7 +122,7 @@ def get_colocator(self, model_name: str = None, obs_name: str = None) -> Colocat mod_cfg = self.cfg.get_model_entry(model_name) col_cfg["model_cfg"] = mod_cfg - # LB: Hack and at what lowlevel_helpers's import_from was doing + # Hack and at what lowlevel_helpers's import_from was doing for key, val in mod_cfg.items(): if key in ColocationSetup.model_fields: col_cfg[key] = val @@ -131,7 +131,7 @@ def get_colocator(self, model_name: str = None, obs_name: str = None) -> Colocat pyaro_config = obs_cfg["obs_config"] if "obs_config" in obs_cfg else None col_cfg["obs_config"] = pyaro_config - # LB: Hack and at what lowlevel_helpers's import_from was doing + # Hack and at what lowlevel_helpers's import_from was doing for key, val in obs_cfg.items(): if key in ColocationSetup.model_fields: col_cfg[key] = val diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index af62a98db..d385ca436 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -18,7 +18,7 @@ class ObsEntry(BaseModel): - """Observation configuration for evaluation (BaseMode) + """Observation configuration for evaluation (BaseModel) Note ---- @@ -70,7 +70,12 @@ class ObsEntry(BaseModel): """ ## Pydantic ConfigDict - model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow", protected_namespaces=()) + model_config = ConfigDict( + arbitrary_types_allowed=True, + extra="allow", + protected_namespaces=(), + validate_assignment=True, + ) SUPPORTED_VERT_CODES: tuple[ str, @@ -80,7 +85,7 @@ class ObsEntry(BaseModel): "Column", "Profile", "Surface", - ) # Lb: may be redundant with obs_vert_type below + ) ALT_NAMES_VERT_CODES: dict = dict(ModelLevel="Profile") SUPPORTED_VERT_LOCS: tuple[str, str, str] = ( @@ -88,7 +93,7 @@ class ObsEntry(BaseModel): "space", "airborne", ) - # LB: Originally DataSource.SUPPORTED_VERT_LOCS, but that is achild class of BrowseDict so hardcode here for now + # Originally DataSource.SUPPORTED_VERT_LOCS, but that is achild class of BrowseDict so hardcode here for now ###################### ## Required attributes @@ -113,7 +118,7 @@ class ObsEntry(BaseModel): # attributes for reading colocated data files made outside of pyaerocom only_json: bool = False coldata_dir: str | Path | None = ( - None # Lb: Would like this to be a Path but need to see if it will cause issues down the line + None # Would like this to be a Path but need to see if it will cause issues down the line ) ############# @@ -152,9 +157,7 @@ def check_cfg(self): def check_add_obs(self): """Check if this dataset is an auxiliary post dataset""" if len(self.obs_aux_requires) > 0: - if ( - not self.obs_type == "ungridded" - ): # LB: Not sure where self.obs_type is coming from. May need to think about this + if not self.obs_type == "ungridded": raise NotImplementedError( f"Cannot initialise auxiliary setup for {self.obs_id}. " f"Aux obs reading is so far only possible for ungridded observations." diff --git a/tests/aeroval/test_setup_classes.py b/tests/aeroval/test_setup_classes.py index ad3668170..d9ad77bda 100644 --- a/tests/aeroval/test_setup_classes.py +++ b/tests/aeroval/test_setup_classes.py @@ -31,46 +31,6 @@ def test_EvalSetup(cfg_exp1: dict): assert EvalSetup(**cfg_exp1) == EvalSetup.model_validate(cfg_exp1) -# @pytest.mark.parametrize( -# "update,error", -# [ -# pytest.param( -# dict(model_cfg=dict(WRONG_MODEL=MODELS["TM5-AP3-CTRL"])), -# "Invalid name: WRONG_MODEL", -# id="model_cfg", -# ), -# pytest.param( -# dict(obs_cfg=dict(WRONG_OBS=OBS_GROUNDBASED["AERONET-Sun"])), -# "Invalid name: WRONG_OBS", -# id="obs_cfg", -# ), -# pytest.param( -# dict( -# obs_cfg=dict( -# # obs_vars=("concpm10",), -# OBS=dict( -# obs_vars=("concpm10",), -# web_interface_name="WRONG_OBS", -# ), -# ) -# ), -# "Invalid name: WRONG_OBS", -# id="web_interface_name", -# ), -# ], -# ) -# def test_EvalSetup_INVALID_ENTRY_NAMES(cfg_exp1: dict, error: str): -# # with pytest.raises(EvalEntryNameError) as e: -# stp = EvalSetup(**cfg_exp1) -# assert stp -# # assert error in str(e.value) - - -# with pytest.raises(EvalEntryNameError) as e: -# EvalSetup.model_validate(cfg_exp1) -# assert error in str(e.value) - - @pytest.mark.parametrize( "update", ( From 841050d0726da13ce63838791a19e6f4ce692026 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Fri, 4 Oct 2024 17:47:27 +0200 Subject: [PATCH 12/21] dead code --- pyaerocom/aeroval/collections.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index 35e755523..d074d82ac 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -22,8 +22,6 @@ def _check_entry_name(self, key): def __setitem__(self, key, value): self._check_entry_name(key) - # if "web_interface_name" in value: - # self._check_entry_name(value["web_interface_name"]) super().__setitem__(key, value) def keylist(self, name_or_pattern: str = None) -> list: From 76941d740b04dfb3738115a6b913207fdd5b7df7 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 6 Oct 2024 18:07:55 +0200 Subject: [PATCH 13/21] make obs_id required --- pyaerocom/aeroval/obsentry.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index d385ca436..b0b932523 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -98,12 +98,12 @@ class ObsEntry(BaseModel): ###################### ## Required attributes ###################### - obs_vars: tuple[str, ...] | str - obs_vert_type: str + obs_vars: str | tuple[str, ...] + obs_id: str | tuple[str, ...] ###################### ## Optional attributes ###################### - obs_id: str | tuple[str, ...] = "" + # obs_id: str | tuple[str, ...] = "" obs_ts_type_read: str | dict | None = None obs_vert_type: Literal["Column", "Profile", "Surface"] = "Surface" obs_aux_requires: dict[str, dict] = {} From aee0fdbd75180f051e859fbd8b233272507e1f5a Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 6 Oct 2024 18:55:25 +0200 Subject: [PATCH 14/21] implement obs_ver_type validator --- pyaerocom/aeroval/obsentry.py | 78 ++++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index b0b932523..512cdc2b3 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -17,6 +17,26 @@ logger = logging.getLogger(__name__) +SUPPORTED_VERT_CODES: tuple[ + str, + str, + str, +] = ( + "Column", + "Profile", + "Surface", +) + +ALT_NAMES_VERT_CODES: dict = dict(ModelLevel="Profile") + + +SUPPORTED_VERT_LOCS: tuple[str, str, str] = ( + "ground", + "space", + "airborne", +) + + class ObsEntry(BaseModel): """Observation configuration for evaluation (BaseModel) @@ -77,24 +97,6 @@ class ObsEntry(BaseModel): validate_assignment=True, ) - SUPPORTED_VERT_CODES: tuple[ - str, - str, - str, - ] = ( - "Column", - "Profile", - "Surface", - ) - ALT_NAMES_VERT_CODES: dict = dict(ModelLevel="Profile") - - SUPPORTED_VERT_LOCS: tuple[str, str, str] = ( - "ground", - "space", - "airborne", - ) - # Originally DataSource.SUPPORTED_VERT_LOCS, but that is achild class of BrowseDict so hardcode here for now - ###################### ## Required attributes ###################### @@ -134,11 +136,41 @@ def validate_obs_vars(cls, v): @field_validator("instr_vert_loc") @classmethod def validate_instr_vert_loc(cls, v): - if isinstance(v, str) and v not in cls.SUPPORTED_VERT_LOCS: + if isinstance(v, str) and v not in SUPPORTED_VERT_LOCS: raise AttributeError( f"Invalid value for instr_vert_loc: {v} for {cls.obs_id}. " - f"Please choose from: {cls.SUPPORTED_VERT_LOCS}" + f"Please choose from: {SUPPORTED_VERT_LOCS}" + ) + + @field_validator("obs_vert_type") + @classmethod + def check_obs_vert_type(cls, ovt): + """Check if obs_vert_type string is valid alias + Parameters + ---------- + ovt : str + obs_vert_type string + Returns + ------- + str + valid obs_vert_type + Raises + ------ + ValueError + if `ovt` is invalid + """ + if ovt in SUPPORTED_VERT_CODES: + return ovt + if ovt in ALT_NAMES_VERT_CODES: + logger.warning( + f"Please use {ALT_NAMES_VERT_CODES[ovt]} for obs_vert_code and not {ovt}" ) + ovt = ALT_NAMES_VERT_CODES[ovt] + return ovt + valid = SUPPORTED_VERT_CODES + list(ALT_NAMES_VERT_CODES) + raise ValueError( + f"Invalid value for obs_vert_type: {ovt}. " f"Supported codes are {valid}." + ) @model_validator(mode="after") def check_cfg(self): @@ -148,6 +180,7 @@ def check_cfg(self): f"or specification of ids and variables via obs_compute_post" ) self.check_add_obs() + # self._check_ovt(self.obs_vert_type) return self ########## @@ -201,9 +234,8 @@ def get_vert_code(self, var): val = vc[var] else: raise ValueError(f"invalid value for obs_vert_type: {vc}") - if val not in self.SUPPORTED_VERT_CODES: + if val not in SUPPORTED_VERT_CODES: raise ValueError( - f"invalid value for obs_vert_type: {val}. Choose from " - f"{self.SUPPORTED_VERT_CODES}." + f"invalid value for obs_vert_type: {val}. Choose from " f"{SUPPORTED_VERT_CODES}." ) return val From 88a7c45b6e35e3d3040b8f826a2c583b002d8524 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 6 Oct 2024 19:29:32 +0200 Subject: [PATCH 15/21] get_obs_entry returns ObsEntry instance, not a dict --- pyaerocom/aeroval/_processing_base.py | 4 ++-- pyaerocom/aeroval/experiment_processor.py | 12 ++++++------ pyaerocom/aeroval/obsentry.py | 2 +- pyaerocom/aeroval/setup_classes.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pyaerocom/aeroval/_processing_base.py b/pyaerocom/aeroval/_processing_base.py index e9f0829ad..480e19d61 100644 --- a/pyaerocom/aeroval/_processing_base.py +++ b/pyaerocom/aeroval/_processing_base.py @@ -92,7 +92,7 @@ def _get_diurnal_only(self, obs_name): ------- diurnal_only : bool """ - return self.cfg.get_obs_entry(obs_name).get("diurnal_only", False) + return self.cfg.get_obs_entry(obs_name).diurnal_only def get_colocator(self, model_name: str = None, obs_name: str = None) -> Colocator: """ @@ -132,7 +132,7 @@ def get_colocator(self, model_name: str = None, obs_name: str = None) -> Colocat col_cfg["obs_config"] = pyaro_config # Hack and at what lowlevel_helpers's import_from was doing - for key, val in obs_cfg.items(): + for key, val in obs_cfg.model_dump().items(): if key in ColocationSetup.model_fields: col_cfg[key] = val diff --git a/pyaerocom/aeroval/experiment_processor.py b/pyaerocom/aeroval/experiment_processor.py index 4f292135e..046846af9 100644 --- a/pyaerocom/aeroval/experiment_processor.py +++ b/pyaerocom/aeroval/experiment_processor.py @@ -34,7 +34,7 @@ def _run_single_entry(self, model_name, obs_name, var_list): logger.info(msg) return ocfg = self.cfg.get_obs_entry(obs_name) - if ocfg["is_superobs"]: + if ocfg.is_superobs: try: engine = SuperObsEngine(self.cfg) engine.run( @@ -47,19 +47,19 @@ def _run_single_entry(self, model_name, obs_name, var_list): if self.raise_exceptions: raise logger.warning("failed to process superobs...") - elif ocfg["only_superobs"]: + elif ocfg.only_superobs: logger.info( f"Skipping json processing of {obs_name}, as this is " f"marked to be used only as part of a superobs " f"network" ) - elif ocfg["only_json"]: - if not ocfg["coldata_dir"]: + elif ocfg.only_json: + if not ocfg.coldata_dir: raise Exception( "No coldata_dir provided for an obs network for which only_json=True. The assumption of setting only_json=True is that colocated files already exist, and so a directory for these files must be provided." ) else: - preprocessed_coldata_dir = ocfg["coldata_dir"] + preprocessed_coldata_dir = ocfg.coldata_dir mask = f"{preprocessed_coldata_dir}/{model_name}/*.nc" files_to_convert = glob.glob(mask) engine = ColdataToJsonEngine(self.cfg) @@ -69,7 +69,7 @@ def _run_single_entry(self, model_name, obs_name, var_list): # If a var_list is given, only run on the obs networks which contain that variable if var_list: var_list_asked = var_list - obs_vars = ocfg["obs_vars"] + obs_vars = ocfg.obs_vars var_list = list(set(obs_vars) & set(var_list)) if not var_list: logger.warning( diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index 512cdc2b3..76f63367a 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -105,7 +105,6 @@ class ObsEntry(BaseModel): ###################### ## Optional attributes ###################### - # obs_id: str | tuple[str, ...] = "" obs_ts_type_read: str | dict | None = None obs_vert_type: Literal["Column", "Profile", "Surface"] = "Surface" obs_aux_requires: dict[str, dict] = {} @@ -115,6 +114,7 @@ class ObsEntry(BaseModel): colocation_layer_limts: tuple[LayerLimits, ...] | None = None profile_layer_limits: tuple[LayerLimits, ...] | None = None web_interface_name: str | None = None + diurnal_only: bool = False read_opts_ungridded: dict = {} # attributes for reading colocated data files made outside of pyaerocom diff --git a/pyaerocom/aeroval/setup_classes.py b/pyaerocom/aeroval/setup_classes.py index 6fda955e1..61059852b 100644 --- a/pyaerocom/aeroval/setup_classes.py +++ b/pyaerocom/aeroval/setup_classes.py @@ -529,7 +529,7 @@ def serialize_model_cfg(self, model_cfg: ModelCollection): ########################### def get_obs_entry(self, obs_name) -> dict: - return self.obs_cfg.get_entry(obs_name).model_dump() + return self.obs_cfg.get_entry(obs_name) def get_model_entry(self, model_name) -> dict: """Get model entry configuration From 519df9f31313494880d640fc3fa3b2ecfa619ac8 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Sun, 6 Oct 2024 19:34:36 +0200 Subject: [PATCH 16/21] rm _get_diurnal_only and associated tests --- pyaerocom/aeroval/_processing_base.py | 20 +------------------- tests/aeroval/test__processing_base.py | 12 ++++++------ 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/pyaerocom/aeroval/_processing_base.py b/pyaerocom/aeroval/_processing_base.py index 480e19d61..5a633ee1e 100644 --- a/pyaerocom/aeroval/_processing_base.py +++ b/pyaerocom/aeroval/_processing_base.py @@ -76,24 +76,6 @@ class HasColocator(HasConfig): Config class that also has the ability to co-locate """ - def _get_diurnal_only(self, obs_name): - """ - Check if colocated data is flagged for only diurnal processing - - Parameters - ---------- - obs_name : string - Name of observational subset - colocated_data : ColocatedData - A ColocatedData object that will be checked for suitability of - diurnal processing. - - Returns - ------- - diurnal_only : bool - """ - return self.cfg.get_obs_entry(obs_name).diurnal_only - def get_colocator(self, model_name: str = None, obs_name: str = None) -> Colocator: """ Instantiate colocation engine @@ -136,7 +118,7 @@ def get_colocator(self, model_name: str = None, obs_name: str = None) -> Colocat if key in ColocationSetup.model_fields: col_cfg[key] = val - col_cfg["add_meta"].update(diurnal_only=self._get_diurnal_only(obs_name)) + col_cfg["add_meta"].update(diurnal_only=self.cfg.get_obs_entry(obs_name).diurnal_only) col_stp = ColocationSetup(**col_cfg) col = Colocator(col_stp) diff --git a/tests/aeroval/test__processing_base.py b/tests/aeroval/test__processing_base.py index e67ede8de..111611dc7 100644 --- a/tests/aeroval/test__processing_base.py +++ b/tests/aeroval/test__processing_base.py @@ -14,7 +14,12 @@ def setup() -> EvalSetup: """EvalSetup instance""" obs_cfg = dict( obs1=dict(obs_id="obs1", obs_vars=["od550aer"], obs_vert_type="Column"), - obs2=dict(obs_id="obs2", obs_vars=["od550aer"], obs_vert_type="Column", diurnal_only=True), + obs2=dict( + obs_id="obs2", + obs_vars=["od550aer"], + obs_vert_type="Column", + diurnal_only=True, + ), ) return EvalSetup(proj_id="bla", exp_id="blub", obs_cfg=obs_cfg) @@ -44,11 +49,6 @@ def collocator(setup: EvalSetup) -> HasColocator: return HasColocator(setup) -def test_HasColocator_get_diurnal_only(collocator: HasColocator): - assert not collocator._get_diurnal_only("obs1") - assert collocator._get_diurnal_only("obs2") - - @pytest.mark.parametrize("obs_name", [None, "obs1", "obs2"]) def test_HasColocator_get_colocator(collocator: HasColocator, obs_name: str | None): col = collocator.get_colocator(obs_name=obs_name) From ad024620ad022e076a2999c1e31bee40f97409a7 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Mon, 7 Oct 2024 09:13:11 +0200 Subject: [PATCH 17/21] clean up doc strings --- pyaerocom/aeroval/obsentry.py | 2 +- pyaerocom/aeroval/setup_classes.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index 76f63367a..cc43986eb 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -42,7 +42,7 @@ class ObsEntry(BaseModel): Note ---- - Only :attr:`obs_id` and `obs_vars` are mandatory, the rest is optional. + Only :attr:`obs_id` and `obs_vars` are mandatory, the rest are optional. Attributes ---------- diff --git a/pyaerocom/aeroval/setup_classes.py b/pyaerocom/aeroval/setup_classes.py index 61059852b..aaddbc11a 100644 --- a/pyaerocom/aeroval/setup_classes.py +++ b/pyaerocom/aeroval/setup_classes.py @@ -528,7 +528,8 @@ def serialize_model_cfg(self, model_cfg: ModelCollection): ## Methods ########################### - def get_obs_entry(self, obs_name) -> dict: + def get_obs_entry(self, obs_name): + """Returns ObsEntry instance for network obs_name""" return self.obs_cfg.get_entry(obs_name) def get_model_entry(self, model_name) -> dict: From 895e8333b40aaa4bf2a937c24f9a07b4939e4365 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Mon, 7 Oct 2024 14:26:48 +0200 Subject: [PATCH 18/21] web_interface_names --- pyaerocom/aeroval/collections.py | 6 +++--- pyaerocom/aeroval/experiment_output.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index d074d82ac..482ade93c 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -68,7 +68,7 @@ def get_entry(self, key) -> object: @property @abc.abstractmethod - def web_iface_names(self) -> list: + def web_interface_names(self) -> list: """ List of webinterface names for """ @@ -149,7 +149,7 @@ def get_web_interface_name(self, key): return self[key].web_interface_name if self[key].web_interface_name is not None else key @property - def web_iface_names(self) -> list: + def web_interface_names(self) -> list: """ List of web interface names for each obs entry @@ -219,7 +219,7 @@ def get_entry(self, key) -> ModelEntry: raise EntryNotAvailable(f"no such entry {key}") @property - def web_iface_names(self) -> list: + def web_interface_names(self) -> list: """ List of web interface names for each obs entry diff --git a/pyaerocom/aeroval/experiment_output.py b/pyaerocom/aeroval/experiment_output.py index c6aa1fbcf..0e827b495 100644 --- a/pyaerocom/aeroval/experiment_output.py +++ b/pyaerocom/aeroval/experiment_output.py @@ -425,7 +425,7 @@ def _check_clean_ts_file(self, fp) -> bool: return True models_avail = list(data) - models_in_exp = self.cfg.model_cfg.web_iface_names + models_in_exp = self.cfg.model_cfg.web_interfaceface_names if all([mod in models_in_exp for mod in models_avail]): # nothing to clean up return False @@ -512,7 +512,7 @@ def get_model_order_menu(self) -> list: ) order.extend(self.cfg.webdisp_opts.model_order_menu) elif self.cfg.webdisp_opts.obsorder_from_config: - order.extend(self.cfg.model_cfg.web_iface_names) + order.extend(self.cfg.model_cfg.web_interface_names) return order def get_obs_order_menu(self) -> list: @@ -526,7 +526,7 @@ def get_obs_order_menu(self) -> list: ) order.extend(self.cfg.webdisp_opts.obs_order_menu) elif self.cfg.webdisp_opts.obsorder_from_config: - order.extend(self.cfg.obs_cfg.web_iface_names) + order.extend(self.cfg.obs_cfg.web_interface_names) return order def _get_json_output_files(self, dirname) -> list[str]: From aff6d5eef123762d7db7b7d1e5ebd1555c1940ec Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Mon, 7 Oct 2024 15:32:20 +0200 Subject: [PATCH 19/21] feedback --- pyaerocom/aeroval/collections.py | 3 ++- pyaerocom/aeroval/experiment_output.py | 2 +- pyaerocom/aeroval/obsentry.py | 3 ++- pyaerocom/aeroval/setup_classes.py | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pyaerocom/aeroval/collections.py b/pyaerocom/aeroval/collections.py index 482ade93c..96d442b28 100644 --- a/pyaerocom/aeroval/collections.py +++ b/pyaerocom/aeroval/collections.py @@ -11,8 +11,9 @@ class BaseCollection(BrowseDict, abc.ABC): #: maximum length of entry names MAXLEN_KEYS = 25 #: Invalid chars in entry names - FORBIDDEN_CHARS_KEYS = [] # "_" + FORBIDDEN_CHARS_KEYS = [] + # TODO: Wait a few release cycles after v0.23.0 and see if this can be removed def _check_entry_name(self, key): if any([x in key for x in self.FORBIDDEN_CHARS_KEYS]): raise EvalEntryNameError( diff --git a/pyaerocom/aeroval/experiment_output.py b/pyaerocom/aeroval/experiment_output.py index 0e827b495..1dd43f9a6 100644 --- a/pyaerocom/aeroval/experiment_output.py +++ b/pyaerocom/aeroval/experiment_output.py @@ -425,7 +425,7 @@ def _check_clean_ts_file(self, fp) -> bool: return True models_avail = list(data) - models_in_exp = self.cfg.model_cfg.web_interfaceface_names + models_in_exp = self.cfg.model_cfg.web_interface_names if all([mod in models_in_exp for mod in models_avail]): # nothing to clean up return False diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index cc43986eb..c879042f3 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -106,7 +106,7 @@ class ObsEntry(BaseModel): ## Optional attributes ###################### obs_ts_type_read: str | dict | None = None - obs_vert_type: Literal["Column", "Profile", "Surface"] = "Surface" + obs_vert_type: Literal["Column", "Profile", "Surface", "ModelLevel"] = "Surface" obs_aux_requires: dict[str, dict] = {} instr_vert_loc: str | None = None is_superobs: bool = False @@ -115,6 +115,7 @@ class ObsEntry(BaseModel): profile_layer_limits: tuple[LayerLimits, ...] | None = None web_interface_name: str | None = None diurnal_only: bool = False + obs_type: str | None = None read_opts_ungridded: dict = {} # attributes for reading colocated data files made outside of pyaerocom diff --git a/pyaerocom/aeroval/setup_classes.py b/pyaerocom/aeroval/setup_classes.py index aaddbc11a..1677c1b3e 100644 --- a/pyaerocom/aeroval/setup_classes.py +++ b/pyaerocom/aeroval/setup_classes.py @@ -9,6 +9,7 @@ import datetime from pyaerocom.aeroval.glob_defaults import VarWebInfo, VarWebScaleAndColormap +from pyaerocom.aeroval.obsentry import ObsEntry if sys.version_info >= (3, 11): from typing import Self @@ -528,7 +529,7 @@ def serialize_model_cfg(self, model_cfg: ModelCollection): ## Methods ########################### - def get_obs_entry(self, obs_name): + def get_obs_entry(self, obs_name) -> ObsEntry: """Returns ObsEntry instance for network obs_name""" return self.obs_cfg.get_entry(obs_name) From 6256ff50445504eefd0e4e0ed984bebb59453195 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Mon, 7 Oct 2024 15:56:07 +0200 Subject: [PATCH 20/21] dead code --- pyaerocom/aeroval/obsentry.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index c879042f3..1ceb1a98d 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -121,7 +121,7 @@ class ObsEntry(BaseModel): # attributes for reading colocated data files made outside of pyaerocom only_json: bool = False coldata_dir: str | Path | None = ( - None # Would like this to be a Path but need to see if it will cause issues down the line + None # TODO: Would like this to be a Path but need to see if it will cause issues down the line ) ############# @@ -181,7 +181,6 @@ def check_cfg(self): f"or specification of ids and variables via obs_compute_post" ) self.check_add_obs() - # self._check_ovt(self.obs_vert_type) return self ########## @@ -207,7 +206,7 @@ def check_add_obs(self): def get_all_vars(self) -> tuple[str, ...]: """ - Get list of all variables associated with this entry + Get a tuple of all variables associated with this entry Returns ------- From 114ecddcb3643b945fb7cacb98215b49b3526d98 Mon Sep 17 00:00:00 2001 From: Lewis Blake Date: Mon, 7 Oct 2024 16:13:54 +0200 Subject: [PATCH 21/21] make ObsEntry immutable --- pyaerocom/aeroval/obsentry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index 1ceb1a98d..7931f0466 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -93,7 +93,7 @@ class ObsEntry(BaseModel): model_config = ConfigDict( arbitrary_types_allowed=True, extra="allow", - protected_namespaces=(), + allow_mutation=False, validate_assignment=True, )