diff --git a/pyaerocom/aeroval/coldatatojson_engine.py b/pyaerocom/aeroval/coldatatojson_engine.py index 935b36372..39fff89b4 100644 --- a/pyaerocom/aeroval/coldatatojson_engine.py +++ b/pyaerocom/aeroval/coldatatojson_engine.py @@ -1,7 +1,10 @@ import logging import os +import shutil from time import time +from cf_units import Unit + from pyaerocom import ColocatedData, TsType from pyaerocom._lowlevel_helpers import write_json from pyaerocom.aeroval._processing_base import ProcessingEngine @@ -18,10 +21,14 @@ _process_statistics_timeseries, _write_site_data, _write_stationdata_json, + add_profile_entry_json, get_heatmap_filename, get_json_mapname, + get_profile_filename, get_timeseries_file_name, init_regions_web, + process_profile_data_for_regions, + process_profile_data_for_stations, update_regions_json, ) from pyaerocom.exceptions import AeroValConfigError, TemporalResolutionError @@ -46,6 +53,11 @@ def run(self, files): list of files that have been converted. """ + out_dirs = self.cfg.path_manager.get_json_output_dirs(True) + for idir in out_dirs: + if os.path.exists(out_dirs[idir]): + shutil.rmtree(out_dirs[idir]) + converted = [] for file in files: logger.info(f"Processing: {file}") @@ -93,7 +105,15 @@ def process_coldata(self, coldata: ColocatedData): stats_min_num = self.cfg.statistics_opts.MIN_NUM - vert_code = coldata.get_meta_item("vert_code") + if hasattr(coldata.data, "altitude_units"): + if Unit(coldata.data.attrs["altitude_units"]) != Unit( + "km" + ): # put everything in terms of km for viz + # convert start and end for file naming + self._convert_coldata_altitude_units_to_km(coldata) + + vert_code = self._get_vert_code(coldata) + diurnal_only = coldata.get_meta_item("diurnal_only") add_trends = self.cfg.statistics_opts.add_trends @@ -109,7 +129,7 @@ def process_coldata(self, coldata: ColocatedData): # this will need to be figured out as soon as there is altitude elif "altitude" in coldata.data.dims: - raise NotImplementedError("Cannot yet handle profile data") + raise ValueError("Altitude should have been dealt with already in the colocation") elif not isinstance(coldata, ColocatedData): raise ValueError(f"Need ColocatedData object, got {type(coldata)}") @@ -165,114 +185,234 @@ def process_coldata(self, coldata: ColocatedData): if annual_stats_constrained: data = _apply_annual_constraint(data) - if not diurnal_only: - logger.info("Processing statistics timeseries for all regions") - input_freq = self.cfg.statistics_opts.stats_tseries_base_freq - - for reg in regnames: - try: - stats_ts = _process_statistics_timeseries( - data=data, - freq=main_freq, - region_ids={reg: regnames[reg]}, - use_weights=use_weights, - use_country=use_country, - data_freq=input_freq, - ) - - except TemporalResolutionError: - stats_ts = {} - - fname = get_timeseries_file_name(regnames[reg], obs_name, var_name_web, vert_code) - ts_file = os.path.join(out_dirs["hm/ts"], fname) - _add_heatmap_entry_json( - ts_file, stats_ts, obs_name, var_name_web, vert_code, model_name, model_var + if coldata.data.attrs.get("just_for_viz", True): # make the regular json output + if not diurnal_only: + logger.info("Processing statistics timeseries for all regions") + + self._process_stats_timeseries_for_all_regions( + data=data, + coldata=coldata, + main_freq=main_freq, + regnames=regnames, + use_weights=use_weights, + use_country=use_country, + obs_name=obs_name, + obs_var=obs_var, + var_name_web=var_name_web, + out_dirs=out_dirs, + vert_code=vert_code, + model_name=model_name, + model_var=model_var, + meta_glob=meta_glob, + periods=periods, + seasons=seasons, + add_trends=add_trends, + trends_min_yrs=trends_min_yrs, + regions_how=regions_how, + regs=regs, + stats_min_num=stats_min_num, + use_fairmode=use_fairmode, ) - - logger.info("Processing heatmap data for all regions") - hm_all = _process_heatmap_data( - data, - regnames, - use_weights, - use_country, - meta_glob, - periods, - seasons, - add_trends, - trends_min_yrs, + else: + logger.info("Processing profile data for vizualization") + + self._process_profile_data_for_vizualization( + data=data, + use_country=use_country, + region_names=regnames, + station_names=coldata.data.station_name.values, + periods=periods, + seasons=seasons, + var_name_web=var_name_web, + out_dirs=out_dirs, ) - for freq, hm_data in hm_all.items(): - fname = get_heatmap_filename(freq) + logger.info( + f"Finished computing json files for {model_name} ({model_var}) vs. " + f"{obs_name} ({obs_var})" + ) - hm_file = os.path.join(out_dirs["hm"], fname) + dt = time() - t00 + logger.info(f"Time expired: {dt:.2f} s") - _add_heatmap_entry_json( - hm_file, hm_data, obs_name, var_name_web, vert_code, model_name, model_var - ) + def _convert_coldata_altitude_units_to_km(self, coldata: ColocatedData = None): + alt_units = coldata.data.attrs["altitude_units"] - logger.info("Processing regional timeseries for all regions") - ts_objs_regional = _process_regional_timeseries(data, regnames, regions_how, meta_glob) - - _write_site_data(ts_objs_regional, out_dirs["ts"]) - if coldata.has_latlon_dims: - for cd in data.values(): - if cd is not None: - cd.data = cd.flatten_latlondim_station_name().data - - logger.info("Processing individual site timeseries data") - (ts_objs, map_meta, site_indices) = _process_sites(data, regs, regions_how, meta_glob) - - _write_site_data(ts_objs, out_dirs["ts"]) - - scatter_freq = min(TsType(fq) for fq in self.cfg.time_cfg.freqs) - scatter_freq = min(scatter_freq, main_freq) - - logger.info("Processing map and scat data by period") - for period in periods: - # compute map_data and scat_data just for this period - map_data, scat_data = _process_map_and_scat( - data, - map_meta, - site_indices, - [period], - str(scatter_freq), - stats_min_num, - seasons, - add_trends, - trends_min_yrs, - use_fairmode, - obs_var, - ) + coldata.data.attrs["vertical_layer"]["start"] = str( + Unit(alt_units).convert(coldata.data.attrs["vertical_layer"]["start"], other="km") + ) - # the files in /map and /scat will be split up according to their time period as well - map_name = get_json_mapname( - obs_name, var_name_web, model_name, model_var, vert_code, period - ) - outfile_map = os.path.join(out_dirs["map"], map_name) - write_json(map_data, outfile_map, ignore_nan=True) + coldata.data.attrs["vertical_layer"]["end"] = str( + Unit(alt_units).convert(coldata.data.attrs["vertical_layer"]["end"], other="km") + ) - outfile_scat = os.path.join(out_dirs["scat"], map_name) - write_json(scat_data, outfile_scat, ignore_nan=True) + def _get_vert_code(self, coldata: ColocatedData = None): + if hasattr(coldata.data, "vertical_layer"): + # start and end for vertical layers (display on web and name jsons) + start = float(coldata.data.attrs["vertical_layer"]["start"]) + end = float(coldata.data.attrs["vertical_layer"]["end"]) + # format correctly (e.g., 1, 1.5, 2, 2.5, etc.) + start = f"{round(float(start), 1):g}" + end = f"{round(float(end), 1):g}" + vert_code = f"{start}-{end}km" + else: + vert_code = coldata.get_meta_item("vert_code") + return vert_code + + def _process_profile_data_for_vizualization( + self, + data: dict[str, ColocatedData] = None, + use_country: bool = False, + region_names=None, + station_names=None, + periods: list[str] = None, + seasons: list[str] = None, + obs_name: str = None, + var_name_web: str = None, + out_dirs: dict = None, + ): + assert ( + region_names != None and station_names != None + ), f"Both region_id and station_name can not both be None" + + # Loop through regions + for regid in region_names: + profile_viz = process_profile_data_for_regions( + data=data, + region_id=regid, + use_country=use_country, + periods=periods, + seasons=seasons, + ) - if coldata.ts_type == "hourly" and use_diurnal: - logger.info("Processing diurnal profiles") - (ts_objs_weekly, ts_objs_weekly_reg) = _process_sites_weekly_ts( - coldata, regions_how, regnames, meta_glob + fname = get_profile_filename(region_names[regid], obs_name, var_name_web) + + outfile_profile = os.path.join(out_dirs["profiles"], fname) + add_profile_entry_json(outfile_profile, data, profile_viz, periods, seasons) + # Loop through stations + for station_name in station_names: + profile_viz = process_profile_data_for_stations( + data=data, + station_name=station_name, + use_country=use_country, + periods=periods, + seasons=seasons, ) - outdir = os.path.join(out_dirs["ts/diurnal"]) - for ts_data_weekly in ts_objs_weekly: - # writes json file - _write_stationdata_json(ts_data_weekly, outdir) - if ts_objs_weekly_reg != None: - for ts_data_weekly_reg in ts_objs_weekly_reg: - # writes json file - _write_stationdata_json(ts_data_weekly_reg, outdir) - logger.info( - f"Finished computing json files for {model_name} ({model_var}) vs. " - f"{obs_name} ({obs_var})" + fname = get_profile_filename(station_name, obs_name, var_name_web) + + outfile_profile = os.path.join(out_dirs["profiles"], fname) + add_profile_entry_json(outfile_profile, data, profile_viz, periods, seasons) + + def _process_stats_timeseries_for_all_regions( + self, + data: dict[str, ColocatedData] = None, + coldata: ColocatedData = None, + main_freq: str = None, + regnames=None, + use_weights: bool = True, + use_country: bool = False, + obs_name: str = None, + obs_var: str = None, + var_name_web: str = None, + out_dirs: dict = None, + vert_code: str = None, + model_name: str = None, + model_var: str = None, + meta_glob: dict = None, + periods: list[str] = None, + seasons: list[str] = None, + add_trends: bool = False, + trends_min_yrs: int = 7, + regions_how: str = "default", + regs: dict = None, + stats_min_num: int = 1, + use_fairmode: bool = False, + ): + input_freq = self.cfg.statistics_opts.stats_tseries_base_freq + for reg in regnames: + try: + stats_ts = _process_statistics_timeseries( + data=data, + freq=main_freq, + region_ids={reg: regnames[reg]}, + use_weights=use_weights, + use_country=use_country, + data_freq=input_freq, + ) + + except TemporalResolutionError: + stats_ts = {} + fname = get_timeseries_file_name(regnames[reg], obs_name, var_name_web, vert_code) + ts_file = os.path.join(out_dirs["hm/ts"], fname) + _add_heatmap_entry_json( + ts_file, stats_ts, obs_name, var_name_web, vert_code, model_name, model_var + ) + + logger.info("Processing heatmap data for all regions") + + hm_all = _process_heatmap_data( + data, + regnames, + use_weights, + use_country, + meta_glob, + periods, + seasons, + add_trends, + trends_min_yrs, ) - dt = time() - t00 - logger.info(f"Time expired (TOTAL): {dt:.2f} s") + for freq, hm_data in hm_all.items(): + fname = get_heatmap_filename(freq) + + hm_file = os.path.join(out_dirs["hm"], fname) + + _add_heatmap_entry_json( + hm_file, hm_data, obs_name, var_name_web, vert_code, model_name, model_var + ) + + logger.info("Processing regional timeseries for all regions") + ts_objs_regional = _process_regional_timeseries(data, regnames, regions_how, meta_glob) + + _write_site_data(ts_objs_regional, out_dirs["ts"]) + if coldata.has_latlon_dims: + for cd in data.values(): + if cd is not None: + cd.data = cd.flatten_latlondim_station_name().data + + logger.info("Processing individual site timeseries data") + (ts_objs, map_meta, site_indices) = _process_sites(data, regs, regions_how, meta_glob) + + _write_site_data(ts_objs, out_dirs["ts"]) + + scatter_freq = min(TsType(fq) for fq in self.cfg.time_cfg.freqs) + scatter_freq = min(scatter_freq, main_freq) + + logger.info("Processing map and scat data by period") + + for period in periods: + # compute map_data and scat_data just for this period + map_data, scat_data = _process_map_and_scat( + data, + map_meta, + site_indices, + [period], + str(scatter_freq), + stats_min_num, + seasons, + add_trends, + trends_min_yrs, + use_fairmode, + obs_var, + ) + + # the files in /map and /scat will be split up according to their time period as well + map_name = get_json_mapname( + obs_name, var_name_web, model_name, model_var, vert_code, period + ) + outfile_map = os.path.join(out_dirs["map"], map_name) + write_json(map_data, outfile_map, ignore_nan=True) + + outfile_scat = os.path.join(out_dirs["scat"], map_name) + write_json(scat_data, outfile_scat, ignore_nan=True) diff --git a/pyaerocom/aeroval/coldatatojson_helpers.py b/pyaerocom/aeroval/coldatatojson_helpers.py index 981a9811a..42d634291 100644 --- a/pyaerocom/aeroval/coldatatojson_helpers.py +++ b/pyaerocom/aeroval/coldatatojson_helpers.py @@ -1363,3 +1363,214 @@ def _init_data_default_frequencies(coldata, to_ts_types): def _start_stop_from_periods(periods): start, stop = _get_min_max_year_periods(periods) return start_stop(start, stop + 1) + + +def get_profile_filename(station_or_region_name, obs_name, var_name_web): + return f"{station_or_region_name}_{obs_name}_{var_name_web}.json" + + +def process_profile_data_for_regions( + data: ColocatedData, + region_id: str, + use_country: bool, + periods: list[str], + seasons: list[str], +) -> dict: # pragma: no cover + """ + This method populates the json files in data/profiles which are use for visualization. + Analogous to _process_map_and_scat for profile data. + Each json file corresponds to a region or station, obs network, and variable. + Inside the json, it is broken up by model. + Each model has a key for "z" (the vertical dimension), "obs", and "mod" + Each "obs" and "mod" is broken up by period. + + + Args: + data (ColocatedData): ColocatedData object for this layer + region_id (str): Spatial subset to compute the mean profiles over + station_name (str): Station to compute mean profiles over for period + use_country (boolean): Passed to filter_region(). + periods (str): Year part of the temporal range to average over + seasons (str): Sesonal part of the temporal range to average over + + Returns: + output (dict): Dictionary to write to json + """ + output = {"obs": {}, "mod": {}} + + for freq, coldata in data.items(): + if freq not in output["obs"]: + output["obs"][freq] = {} + if freq not in output["mod"]: + output["mod"][freq] = {} + + for per in periods: + for season in seasons: + use_dummy = coldata is None + perstr = f"{per}-{season}" + if use_dummy: + output["obs"][freq][perstr] = np.nan + output["mod"][freq][perstr] = np.nan + else: + try: + per_season_subset = _select_period_season_coldata(coldata, per, season) + + subset = per_season_subset.filter_region( + region_id=region_id, check_country_meta=use_country + ) + + output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :]) + output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :]) + + except (DataCoverageError, TemporalResolutionError) as e: + msg = f"Failed to access subset timeseries, and will skip. Reason was: {e}" + logger.warning(msg) + + output["obs"][freq][perstr] = np.nan + output["mod"][freq][perstr] = np.nan + + return output + + +def process_profile_data_for_stations( + data: ColocatedData, + station_name: str, + use_country: bool, + periods: list[str], + seasons: list[str], +) -> dict: # pragma: no cover + """ + This method populates the json files in data/profiles which are use for visualization. + Analogous to _process_map_and_scat for profile data. + Each json file corresponds to a region, obs network, and variable. + Inside the json, it is broken up by model. + Each model has a key for "z" (the vertical dimension), "obs", and "mod" + Each "obs" and "mod" is broken up by period. + + + Args: + data (ColocatedData): ColocatedData object for this layer + region_id (str): Spatial subset to compute the mean profiles over + station_name (str): Station to compute mean profiles over for period + use_country (boolean): Passed to filter_region(). + periods (str): Year part of the temporal range to average over + seasons (str): Sesonal part of the temporal range to average over + + Returns: + output (dict): Dictionary to write to json + """ + output = {"obs": {}, "mod": {}} + + for freq, coldata in data.items(): + if freq not in output["obs"]: + output["obs"][freq] = {} + if freq not in output["mod"]: + output["mod"][freq] = {} + + for per in periods: + for season in seasons: + use_dummy = coldata is None + perstr = f"{per}-{season}" + if use_dummy: + output["obs"][freq][perstr] = np.nan + output["mod"][freq][perstr] = np.nan + else: + try: + per_season_subset = _select_period_season_coldata(coldata, per, season) + + subset = per_season_subset.data[ + :, + :, + per_season_subset.data.station_name.values + == station_name, # in this case a station + ] # Assumes ordering of station name matches + + output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :]) + output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :]) + + except (DataCoverageError, TemporalResolutionError) as e: + msg = f"Failed to access subset timeseries, and will skip. Reason was: {e}" + logger.warning(msg) + + output["obs"][freq][perstr] = np.nan + output["mod"][freq][perstr] = np.nan + + return output + + +def add_profile_entry_json( + profile_file: str, + data: ColocatedData, + profile_viz: dict, + periods: list[str], + seasons: list[str], +): # pragma: no cover + """ + Analogous to _add_heatmap_entry_json for profile data. + Every time this function is called it checks to see if the profile_file exists. + If so, it reads it, if not it makes a new one. + This is because one can not add to json files and so everytime we want to add entries for profile layers + we must read in the old file, add the entries, and write a new file. + + Args: + profile_file (str): Name of profile_file + data (ColocatedData): For this vertical layer + profile_viz (dict): Output of process_profile_data() + periods (list[str]): periods to compute over (years) + seasons (list[str]): seasons to compute over (e.g., All, DJF, etc.) + """ + if os.path.exists(profile_file): + current = read_json(profile_file) + else: + current = {} + + for freq, coldata in data.items(): + model_name = coldata.model_name + if not model_name in current: + current[model_name] = {} + + midpoint = ( + float(coldata.data.attrs["vertical_layer"]["end"]) + + float(coldata.data.attrs["vertical_layer"]["start"]) + ) / 2 + if not "z" in current[model_name]: + current[model_name]["z"] = [midpoint] # initalize with midpoint + + if ( + midpoint > current[model_name]["z"][-1] + ): # only store incremental increases in the layers + current[model_name]["z"].append(midpoint) + + if not "obs" in current[model_name]: + current[model_name]["obs"] = {} + + if not freq in current[model_name]["obs"]: + current[model_name]["obs"][freq] = {} + + if not "mod" in current[model_name]: + current[model_name]["mod"] = {} + + if not freq in current[model_name]["mod"]: + current[model_name]["mod"][freq] = {} + + for per in periods: + for season in seasons: + perstr = f"{per}-{season}" + + if not perstr in current[model_name]["obs"][freq]: + current[model_name]["obs"][freq][perstr] = [] + if not perstr in current[model_name]["mod"][freq]: + current[model_name]["mod"][freq][perstr] = [] + + current[model_name]["obs"][freq][perstr].append(profile_viz["obs"][freq][perstr]) + current[model_name]["mod"][freq][perstr].append(profile_viz["mod"][freq][perstr]) + + if not "metadata" in current[model_name]: + current[model_name]["metadata"] = { + "z_unit": coldata.data.attrs["altitude_units"], + "z_description": "Altitude ASL", + "z_long_description": "Altitude Above Sea Level", + "unit": coldata.unitstr, + } + + write_json(current, profile_file, ignore_nan=True) diff --git a/pyaerocom/aeroval/fairmode_stats.py b/pyaerocom/aeroval/fairmode_stats.py index 564a0c343..3a4b120e2 100644 --- a/pyaerocom/aeroval/fairmode_stats.py +++ b/pyaerocom/aeroval/fairmode_stats.py @@ -7,7 +7,7 @@ - Develop harmonized set of tools to test whether or a not a model is fit for a given purpose - CAMS has to make use of FAIRMODE diagrams -This module contains methods to cmpute the relevant FAIRMODE statistics. +This module contains methods to compute the relevant FAIRMODE statistics. """ import numpy as np diff --git a/pyaerocom/aeroval/modelentry.py b/pyaerocom/aeroval/modelentry.py index d1f7a08de..4016f4a16 100644 --- a/pyaerocom/aeroval/modelentry.py +++ b/pyaerocom/aeroval/modelentry.py @@ -1,6 +1,11 @@ from copy import deepcopy -from pyaerocom._lowlevel_helpers import BrowseDict, DictStrKeysListVals, DictType, StrType +from pyaerocom._lowlevel_helpers import ( + BrowseDict, + DictStrKeysListVals, + DictType, + StrType, +) from pyaerocom.aeroval.aux_io_helpers import check_aux_info diff --git a/pyaerocom/aeroval/obsentry.py b/pyaerocom/aeroval/obsentry.py index 22f92beae..19615de8c 100644 --- a/pyaerocom/aeroval/obsentry.py +++ b/pyaerocom/aeroval/obsentry.py @@ -70,6 +70,8 @@ def __init__(self, **kwargs): self.is_superobs = False self.only_superobs = False + self.colocation_layer_limts = None + self.profile_layer_limits = None self.read_opts_ungridded = {} diff --git a/pyaerocom/aeroval/setupclasses.py b/pyaerocom/aeroval/setupclasses.py index fe48297a1..a09e22e36 100644 --- a/pyaerocom/aeroval/setupclasses.py +++ b/pyaerocom/aeroval/setupclasses.py @@ -39,7 +39,7 @@ class OutputPaths(ConstrainedContainer): """ - JSON_SUBDIRS = ["map", "ts", "ts/diurnal", "scat", "hm", "hm/ts", "contour"] + JSON_SUBDIRS = ["map", "ts", "ts/diurnal", "scat", "hm", "hm/ts", "contour", "profiles"] json_basedir = DirLoc( default=os.path.join(const.OUTPUTDIR, "aeroval/data"), @@ -104,13 +104,13 @@ class StatisticsSetup(ConstrainedContainer): Attributes ---------- weighted_stats : bool - if True, statistical parameters are calculated using area weights, + if True, statistics are calculated using area weights, this is only relevant for gridded / gridded evaluations. annual_stats_constrained : bool if True, then only sites are considered that satisfy a potentially specified annual resampling constraint (see :attr:`pyaerocom.colocation_auto.ColocationSetup.min_num_obs`). E.g. - lets say you want to calculate statistical parameters (bias, + lets say you want to calculate statistics (bias, correlation, etc.) for monthly model / obs data for a given site and year. Lets further say, that there are only 8 valid months of data, and 4 months are missing, so statistics will be calculated for that year diff --git a/pyaerocom/colocateddata.py b/pyaerocom/colocateddata.py index f9bd86c6f..c72baec5f 100644 --- a/pyaerocom/colocateddata.py +++ b/pyaerocom/colocateddata.py @@ -1,3 +1,4 @@ +from __future__ import annotations import logging import os import warnings @@ -1056,12 +1057,28 @@ def rename_variable(self, var_name, new_var_name, data_source, inplace=True): @staticmethod def _aerocom_savename( - obs_var, obs_id, mod_var, mod_id, start_str, stop_str, ts_type, filter_name + obs_var: str, + obs_id: str, + mod_var: str, + mod_id: str, + start_str: str, + stop_str: str, + ts_type: str, + filter_name: str, + vertical_layer: dict[str, float] | None = None, ): - return ( - f"{mod_var}_{obs_var}_MOD-{mod_id}_REF-{obs_id}_" - f"{start_str}_{stop_str}_{ts_type}_{filter_name}" - ) + if vertical_layer is not None: + start_layer = vertical_layer["start"] + end_layer = vertical_layer["end"] + return ( + f"{mod_var}_{obs_var}_MOD-{mod_id}_REF-{obs_id}_" + f"{start_str}_{stop_str}_{ts_type}_{filter_name}_{start_layer}-{end_layer}km" + ) + else: + return ( + f"{mod_var}_{obs_var}_MOD-{mod_id}_REF-{obs_id}_" + f"{start_str}_{stop_str}_{ts_type}_{filter_name}" + ) @property def savename_aerocom(self): diff --git a/pyaerocom/colocation.py b/pyaerocom/colocation.py index 8336ff9ed..b57826bc4 100644 --- a/pyaerocom/colocation.py +++ b/pyaerocom/colocation.py @@ -35,7 +35,7 @@ logger = logging.getLogger(__name__) -def _resolve_var_name(data): +def resolve_var_name(data): """ Check variable name of `GriddedData` against AeroCom default @@ -273,9 +273,9 @@ class can handle different methods with different inputs) # 1. match model data with potential input start / stop and update if # applicable - start, stop = _check_time_ival(data, start, stop) + start, stop = check_time_ival(data, start, stop) # 2. narrow it down with obsdata availability, if applicable - start, stop = _check_time_ival(data_ref, start, stop) + start, stop = check_time_ival(data_ref, start, stop) data = data.crop(time_range=(start, stop)) data_ref = data_ref.crop(time_range=(start, stop)) @@ -288,8 +288,8 @@ class can handle different methods with different inputs) files_ref = [os.path.basename(x) for x in data_ref.from_files] files = [os.path.basename(x) for x in data.from_files] - var, var_aerocom = _resolve_var_name(data) - var_ref, var_ref_aerocom = _resolve_var_name(data_ref) + var, var_aerocom = resolve_var_name(data) + var_ref, var_ref_aerocom = resolve_var_name(data_ref) meta = { "data_source": [data_ref.data_id, data.data_id], "var_name": [var_ref_aerocom, var_aerocom], @@ -350,7 +350,7 @@ class can handle different methods with different inputs) return coldata -def _check_time_ival(data, start, stop): +def check_time_ival(data, start, stop): # get start / stop of gridded data as pandas.Timestamp data_start = to_pandas_timestamp(data.start) data_stop = to_pandas_timestamp(data.stop) @@ -377,7 +377,7 @@ def _check_time_ival(data, start, stop): return start, stop -def _check_ts_type(data, ts_type): +def check_ts_type(data, ts_type): ts_type_data = TsType(data.ts_type) if ts_type is None: ts_type = ts_type_data @@ -684,7 +684,7 @@ def colocate_gridded_ungridded( except DimensionOrderError: data.reorder_dimensions_tseries() - var, var_aerocom = _resolve_var_name(data) + var, var_aerocom = resolve_var_name(data) if var_ref is None: var_ref = var_aerocom var_ref_aerocom = var_aerocom @@ -716,7 +716,7 @@ def colocate_gridded_ungridded( data = regfilter.apply(data) # check time overlap and crop model data if needed - start, stop = _check_time_ival(data, start, stop) + start, stop = check_time_ival(data, start, stop) data = data.crop(time_range=(start, stop)) if regrid_res_deg is not None: @@ -726,7 +726,7 @@ def colocate_gridded_ungridded( reduce_station_data_ts_type = ts_type ts_type_src_data = data.ts_type - ts_type, ts_type_data = _check_ts_type(data, ts_type) + ts_type, ts_type_data = check_ts_type(data, ts_type) if not colocate_time and ts_type < ts_type_data: data = data.resample_time(str(ts_type), min_num_obs=min_num_obs, how=resample_how) ts_type_data = ts_type @@ -748,6 +748,8 @@ def colocate_gridded_ungridded( lat_range = [np.min(latitude), np.max(latitude)] lon_range = [np.min(longitude), np.max(longitude)] # use only sites that are within model domain + + # filter_by_meta wipes is_vertical_profile data_ref = data_ref.filter_by_meta(latitude=lat_range, longitude=lon_range) # get timeseries from all stations in provided time resolution diff --git a/pyaerocom/colocation_3d.py b/pyaerocom/colocation_3d.py new file mode 100644 index 000000000..82a0b96b4 --- /dev/null +++ b/pyaerocom/colocation_3d.py @@ -0,0 +1,490 @@ +""" +Methods and / or classes to perform 3D colocation +""" +from __future__ import annotations + +import logging +import os +from typing import NamedTuple + +import iris +import numpy as np +from cf_units import Unit + +from pyaerocom import __version__ as pya_ver +from pyaerocom import const +from pyaerocom.colocateddata import ColocatedData +from pyaerocom.colocation import ( + _colocate_site_data_helper, + _colocate_site_data_helper_timecol, + _regrid_gridded, + check_time_ival, + check_ts_type, + resolve_var_name, +) +from pyaerocom.exceptions import ( + DataUnitError, + DimensionOrderError, + MetaDataError, + TemporalResolutionError, + VarNotAvailableError, +) +from pyaerocom.filter import Filter +from pyaerocom.helpers import make_datetime_index +from pyaerocom.tstype import TsType + +logger = logging.getLogger(__name__) + + +class ColocatedDataLists(NamedTuple): + colocateddata_for_statistics: list[ColocatedData] + colocateddata_for_profile_viz: list[ColocatedData] + + +def _colocate_vertical_profile_gridded( + data, + data_ref, + start=None, + stop=None, + filter_name=None, + harmonise_units=True, + var_ref=None, + min_num_obs=None, + colocate_time=False, + use_climatology_ref=False, + resample_how=None, + layer_limits: dict[str, dict[str, float]] = None, + obs_stat_data=None, + ungridded_lons=None, + ungridded_lats=None, + col_freq=None, + col_tst=None, + var=None, + var_aerocom=None, + var_ref_aerocom=None, +) -> list[ColocatedData]: + if layer_limits is None: + raise Exception(f"layer limits must be provided") + + data_ref_unit = None + ts_type_src_ref = None + if not harmonise_units: + data_unit = str(data.units) + else: + data_unit = None + + pd_freq = col_tst.to_pandas_freq() + time_idx = make_datetime_index(start, stop, pd_freq) + + time_num = len(time_idx) + stat_num = len(obs_stat_data) + + arr = np.full((2, time_num, stat_num), np.nan) + + lons = np.full(stat_num, np.nan) + lats = np.full(stat_num, np.nan) + alts = np.full(stat_num, np.nan) + + station_names = [""] * stat_num + + dataset_ref = data_ref.contains_datasets[0] + ts_type_src_data = data.ts_type + + list_of_colocateddata_objects = [] + for vertical_layer in layer_limits: + # Think about efficency here in terms of order of loops. candidate for parallelism + # create the 2D layer data + arr = np.full((2, time_num, stat_num), np.nan) + try: + data_this_layer = ( + data.extract( + iris.Constraint( + coord_values={ + "altitude": lambda cell: vertical_layer["start"] + < cell + < vertical_layer["end"] + } + ) + ) + .collapsed("altitude", iris.analysis.MEAN) + .copy() + ) + except Exception as e: + logger.warning(f"No altitude in model data layer {vertical_layer}") + logger.debug(f"Raised: {e}") + continue + + grid_stat_data_this_layer = data_this_layer.to_time_series( + longitude=ungridded_lons, + latitude=ungridded_lats, + ) + + # loop over all stations and append to colocated data object + for i, obs_stat in enumerate(obs_stat_data): + # Add coordinates to arrays required for xarray.DataArray below + lons[i] = obs_stat.longitude + lats[i] = obs_stat.latitude + alts[i] = obs_stat.station_coords[ + "altitude" + ] # altitude refers to altitude of the data. be explcit where getting from + station_names[i] = obs_stat.station_name + + # ToDo: consider removing to keep ts_type_src_ref (this was probably + # introduced for EBAS were the original data frequency is not constant + # but can vary from site to site) + if ts_type_src_ref is None: + ts_type_src_ref = obs_stat["ts_type_src"] + elif obs_stat["ts_type_src"] != ts_type_src_ref: + spl = ts_type_src_ref.split(";") + if not obs_stat["ts_type_src"] in spl: + spl.append(obs_stat["ts_type_src"]) + ts_type_src_ref = ";".join(spl) + + if data_ref_unit is None: + try: + data_ref_unit = obs_stat["var_info"][var_ref]["units"] + except KeyError as e: # variable information or unit is not defined + logger.exception(repr(e)) + try: + unit = obs_stat["var_info"][var_ref]["units"] + except Exception: + unit = None + if not unit == data_ref_unit: + raise ValueError( + f"Cannot perform colocation. " + f"Ungridded data object contains different units ({var_ref})" + ) + # get observations (Note: the index of the observation time series + # is already in the specified frequency format, and thus, does not + # need to be updated, for details (or if errors occur), cf. + # UngriddedData.to_station_data, where the conversion happens) + + # get model station data + grid_stat_this_layer = grid_stat_data_this_layer[i] + + # Make a copy of the station data and resample it to the mean based on hourly resolution. Needs testing! + obs_stat_this_layer = obs_stat.copy() + + try: + obs_stat_this_layer[var_ref] = obs_stat_this_layer.select_altitude( + var_name=var_ref, altitudes=list(vertical_layer.values()) + ).mean( + "altitude", skipna=True # very important to skip nans here + ) + except ValueError: + logger.warning( + f"Var: {var_ref}. Skipping {obs_stat_this_layer.station_name} in altitude layer {vertical_layer} because no data" + ) + continue + + if harmonise_units: + grid_unit = grid_stat_this_layer.get_unit(var) + obs_unit = obs_stat_this_layer.get_unit(var_ref) + if not grid_unit == obs_unit: + grid_stat_this_layer.convert_unit(var, obs_unit) + if data_unit is None: + data_unit = obs_unit + + try: + if colocate_time: + _df = _colocate_site_data_helper_timecol( + stat_data=grid_stat_this_layer, + stat_data_ref=obs_stat_this_layer, + var=var, + var_ref=var_ref, + ts_type=col_freq, + resample_how=resample_how, + min_num_obs=min_num_obs, + use_climatology_ref=use_climatology_ref, + ) + else: + _df = _colocate_site_data_helper( + stat_data=grid_stat_this_layer, + stat_data_ref=obs_stat_this_layer, + var=var, + var_ref=var_ref, + ts_type=col_freq, + resample_how=resample_how, + min_num_obs=min_num_obs, + use_climatology_ref=use_climatology_ref, + ) + + try: + # assign the unified timeseries data to the colocated data array + arr[0, :, i] = _df["ref"].values + arr[1, :, i] = _df["data"].values + except ValueError: + try: + mask = _df.index.intersection(time_idx) + _df = _df.loc[mask] + arr[0, :, i] = _df["ref"].values + arr[1, :, i] = _df["data"].values + except ValueError as e: + logger.warning( + f"Failed to colocate time for station {obs_stat.station_name}. " + f"This station will be skipped (error: {e})" + ) + except TemporalResolutionError as e: + # resolution of obsdata is too low + logger.warning( + f"{var_ref} data from site {obs_stat.station_name} will " + f"not be added to ColocatedData. Reason: {e}" + ) + + try: + revision = data_ref.data_revision[dataset_ref] + except Exception: + try: + revision = data_ref._get_data_revision_helper(dataset_ref) + except MetaDataError: + revision = "MULTIPLE" + except Exception: + revision = "n/a" + + files = [os.path.basename(x) for x in data.from_files] + + meta = { + "data_source": [dataset_ref, data.data_id], + "var_name": [var_ref_aerocom, var_aerocom], + "var_name_input": [var_ref, var], + "ts_type": col_freq, # will be updated below if resampling + "filter_name": filter_name, + "ts_type_src": [ts_type_src_ref, ts_type_src_data], + "var_units": [data_ref_unit, data_unit], + "data_level": 3, + "revision_ref": revision, + "from_files": files, + "from_files_ref": None, + "colocate_time": colocate_time, + "obs_is_clim": use_climatology_ref, + "pyaerocom": pya_ver, + "min_num_obs": min_num_obs, + "resample_how": resample_how, + "vertical_layer": vertical_layer, + } + + # create coordinates of DataArray + coords = { + "data_source": meta["data_source"], + "time": time_idx, + "station_name": station_names, + "latitude": ("station_name", lats), + "longitude": ("station_name", lons), + "altitude": ("station_name", alts), + } + + dims = ["data_source", "time", "station_name"] + coldata = ColocatedData(data=arr, coords=coords, dims=dims, name=var, attrs=meta) + + # add correct units for lat / lon dimensions + coldata.latitude.attrs["standard_name"] = data.latitude.standard_name + coldata.latitude.attrs["units"] = str(data.latitude.units) + + coldata.longitude.attrs["standard_name"] = data.longitude.standard_name + coldata.longitude.attrs["units"] = str(data.longitude.units) + + coldata.data.attrs["altitude_units"] = str(data.altitude.units) + + coldata.vertical_layer = vertical_layer + + list_of_colocateddata_objects.append(coldata) + + return list_of_colocateddata_objects + + +def colocate_vertical_profile_gridded( + data, + data_ref, + ts_type: str = None, + start: str | None = None, + stop: str | None = None, + filter_name: str = None, + regrid_res_deg: int | dict | None = None, + harmonise_units: bool = True, + regrid_scheme: str = "areaweighted", + var_ref: str = None, + update_baseyear_gridded: int = None, + min_num_obs: int | dict | None = None, + colocate_time: bool = False, + use_climatology_ref: bool = False, + resample_how: str | dict = None, + colocation_layer_limits: list[dict] = None, + profile_layer_limits: list[dict] = None, + **kwargs, +) -> ColocatedDataLists: + """ + Colocated vertical profile data with gridded (model) data + + The guts of this function are placed in a helper function as not to repeat the code. + This is done because colocation must occur twice: + i) at the the statistics are computed + ii) at a finder vertical resoltuion for profile vizualization + Some things you do not want to compute twice, however. + So (most of) the things that correspond to both colocation instances are computed here, + and then passed to the helper function. + + Returns + colocated_data_lists : ColocatedDataLists + + ------- + """ + + if filter_name is None: + filter_name = const.DEFAULT_REG_FILTER + try: + data.check_dimcoords_tseries() + except DimensionOrderError: + data.reorder_dimensions_tseries() + + var, var_aerocom = resolve_var_name(data) + if var_ref is None: + var_ref = var_aerocom + var_ref_aerocom = var_aerocom + else: + var_ref_aerocom = const.VARS[var_ref].var_name_aerocom + + if not var_ref in data_ref.contains_vars: + raise VarNotAvailableError( + f"Variable {var_ref} is not available in ungridded " + f"data (which contains {data_ref.contains_vars})" + ) + elif len(data_ref.contains_datasets) > 1: + raise AttributeError( + f"Colocation can only be performed with ungridded data objects " + f"that only contain a single dataset (input data contains: " + f"{data_ref.contains_datasets}. Use method `extract_dataset` of " + f"UngriddedData object to extract single datasets." + ) + + if any( + not {"start", "end"}.issubset(layer) + for layer in colocation_layer_limits + profile_layer_limits + ): + raise KeyError( + "start and end must be provided for profiles in each vertical layer in colocate_vertical_profile_gridded" + ) + + data_ref_meta_idxs_with_var_info = [] + for i in range(len(data_ref.metadata)): + if not "altitude" in data_ref.metadata[i]["var_info"]: + logger.warning( + f"Warning: Station {data_ref.metadata[i]['station_name']} does not have any var_info" + ) + else: + data_ref_meta_idxs_with_var_info.append(i) + + if any( + data.altitude.units != Unit(data_ref.metadata[i]["var_info"]["altitude"]["units"]) + for i in data_ref_meta_idxs_with_var_info + ): + logger.info( + f"Mismatching units in colocation_3d.py. Model has units {data.altitude.units} whereas not all observations have this unit. Debug to find out where." + ) + raise DataUnitError + + if update_baseyear_gridded is not None: + # update time dimension in gridded data + data.base_year = update_baseyear_gridded + + # apply region filter to data + regfilter = Filter(name=filter_name) + data_ref = regfilter.apply(data_ref) + data = regfilter.apply(data) + + # check time overlap and crop model data if needed + start, stop = check_time_ival(data, start, stop) + data = data.crop(time_range=(start, stop)) + + if regrid_res_deg is not None: # pragma: no cover + data = _regrid_gridded(data, regrid_scheme, regrid_res_deg) + + # Special ts_typs for which all stations with ts_type< are removed + reduce_station_data_ts_type = ts_type + + # ts_type_src_data = data.ts_type + ts_type, ts_type_data = check_ts_type(data, ts_type) + if not colocate_time and ts_type < ts_type_data: + data = data.resample_time(str(ts_type), min_num_obs=min_num_obs, how=resample_how) + ts_type_data = ts_type + + if use_climatology_ref: # pragma: no cover + col_freq = "monthly" + obs_start = const.CLIM_START + obs_stop = const.CLIM_STOP + else: + col_freq = str(ts_type) + obs_start = start + obs_stop = stop + + # colocation frequency + col_tst = TsType(col_freq) + + latitude = data.latitude.points + longitude = data.longitude.points + altitude = data.altitude.points + lat_range = [np.min(latitude), np.max(latitude)] + lon_range = [np.min(longitude), np.max(longitude)] + alt_range = [np.min(altitude), np.max(altitude)] + # use only sites that are within model domain + + # filter_by_meta wipes is_vertical_profile + # Also note that filter_by_meta may not be calling alt_range. Function fitler_altitude is defined but not used + data_ref = data_ref.filter_by_meta(latitude=lat_range, longitude=lon_range, altitude=alt_range) + + # get timeseries from all stations in provided time resolution + # (time resampling is done below in main loop) + all_stats = data_ref.to_station_data_all( + vars_to_convert=var_ref, + start=obs_start, + stop=obs_stop, + by_station_name=True, + ts_type_preferred=reduce_station_data_ts_type, + **kwargs, + ) + + if len(all_stats["stats"]) == 0: + raise VarNotAvailableError( + f"Variable {var_ref} is not available in specified time interval ({start}-{stop})" + ) + + # Colocation has to occur twice for vertical profiles. + # Once for the colocation which we will compute the statistics over. + # The second time is just to show the vertical profiles on the web. This needs to be finer + # Here we make a list with the list of ColocatedData objects for both colocation purposes + output_prep = [ + _colocate_vertical_profile_gridded( + data=data, + data_ref=data_ref, + start=start, + stop=stop, + filter_name=filter_name, + harmonise_units=harmonise_units, + var_ref=var_ref, + min_num_obs=min_num_obs, + colocate_time=colocate_time, + use_climatology_ref=use_climatology_ref, + resample_how=resample_how, + layer_limits=layer_limits, + obs_stat_data=all_stats["stats"], + ungridded_lons=all_stats["longitude"], + ungridded_lats=all_stats["latitude"], + col_freq=col_freq, + col_tst=col_tst, + var=var, + var_aerocom=var_aerocom, + var_ref_aerocom=var_ref_aerocom, + ) + for layer_limits in [colocation_layer_limits, profile_layer_limits] + ] + # Create a namedtuple for output. + # Each element in the tuple is a list of ColocatedData objects. + # The length of these lists is the same as the number of colocation layers + + for coldata in output_prep[1]: + coldata.data.attrs["just_for_viz"] = 1 + + colocated_data_lists = ColocatedDataLists( + *output_prep + ) # put the list of prepared output into namedtuple object s.t. both position and named arguments can be used + + return colocated_data_lists diff --git a/pyaerocom/colocation_auto.py b/pyaerocom/colocation_auto.py index 4b3aa2721..b0ed80e11 100644 --- a/pyaerocom/colocation_auto.py +++ b/pyaerocom/colocation_auto.py @@ -9,6 +9,8 @@ from datetime import datetime from pathlib import Path +from cf_units import Unit + if sys.version_info >= (3, 10): # pragma: no cover from importlib import metadata else: # pragma: no cover @@ -24,6 +26,7 @@ colocate_gridded_ungridded, correct_model_stp_coldata, ) +from pyaerocom.colocation_3d import ColocatedDataLists, colocate_vertical_profile_gridded from pyaerocom.config import ALL_REGION_NAME from pyaerocom.exceptions import ColocationError, ColocationSetupError, DataCoverageError from pyaerocom.helpers import ( @@ -349,6 +352,9 @@ def __init__( self.obs_vert_type = None self.obs_ts_type_read = None self.obs_filters = {} + self._obs_is_vertical_profile = False + self.colocation_layer_limits = None + self.profile_layer_limits = None self.read_opts_ungridded = {} @@ -568,6 +574,17 @@ def obs_is_ungridded(self): """ return True if self.obs_id in get_all_supported_ids_ungridded() else False + @property + def obs_is_vertical_profile(self): + """ + bool: True if obs_id refers to a VerticalProfile, else False + """ + return self._obs_is_vertical_profile + + @obs_is_vertical_profile.setter + def obs_is_vertical_profile(self, value): + self._obs_is_vertical_profile = value + @property def model_reader(self): """ @@ -798,7 +815,9 @@ def run(self, var_list: list = None, **opts): self._print_coloc_info(vars_to_process) for mod_var, obs_var in vars_to_process.items(): try: - coldata = self._run_helper(mod_var, obs_var) + coldata = self._run_helper( + mod_var, obs_var + ) # note this can be ColocatedData or ColocatedDataLists if not mod_var in data_out: data_out[mod_var] = {} data_out[mod_var][obs_var] = coldata @@ -1272,7 +1291,26 @@ def _save_coldata(self, coldata): coldata.rename_variable(mod_var, mvar, self.model_id) else: mvar = mod_var - savename = self._coldata_savename(obs_var, mvar, coldata.ts_type) + + if hasattr(coldata, "vertical_layer"): + # save colocated vertical layer netCDF files with vertical layers in km + if not Unit(coldata.data.altitude_units) == Unit("km"): + start = Unit(coldata.data.altitude_units).convert( + coldata.vertical_layer["start"], other="km" + ) + end = Unit(coldata.data.altitude_units).convert( + coldata.vertical_layer["end"], other="km" + ) + vertical_layer = {"start": start, "end": end} + else: + vetical_layer = coldata.vertical_layer + + savename = self._coldata_savename( + obs_var, mvar, coldata.ts_type, vertical_layer=vertical_layer + ) + + else: + savename = self._coldata_savename(obs_var, mvar, coldata.ts_type) fp = coldata.to_netcdf(self.output_dir, savename=savename) self.files_written.append(fp) msg = f"WRITE: {fp}\n" @@ -1326,8 +1364,12 @@ def _check_set_start_stop(self): ) self.start, self.stop = start_stop(self.start, self.stop) - def _coldata_savename(self, obs_var, mod_var, ts_type): + def _coldata_savename(self, obs_var, mod_var, ts_type, **kwargs): """Get filename of colocated data file for saving""" + if "vertical_layer" in kwargs: + vertical_layer = kwargs["vertical_layer"] + else: + vertical_layer = None name = ColocatedData._aerocom_savename( obs_var=obs_var, obs_id=self.get_obs_name(), @@ -1337,6 +1379,7 @@ def _coldata_savename(self, obs_var, mod_var, ts_type): stop_str=self.get_stop_str(), ts_type=ts_type, filter_name=self.filter_name, + vertical_layer=vertical_layer, ) return f"{name}.nc" @@ -1357,14 +1400,21 @@ def _colocation_func(self): function the performs co-location operation """ + + if self.obs_is_vertical_profile: + return colocate_vertical_profile_gridded if self.obs_is_ungridded: return colocate_gridded_ungridded else: return colocate_gridded_gridded - def _prepare_colocation_args(self, model_var, obs_var): + def _prepare_colocation_args(self, model_var: str, obs_var: str): model_data = self.get_model_data(model_var) obs_data = self.get_obs_data(obs_var) + + if getattr(obs_data, "is_vertical_profile", None): + self.obs_is_vertical_profile = obs_data.is_vertical_profile + rshow = self._eval_resample_how(model_var, obs_var) if self.model_use_climatology: @@ -1393,6 +1443,11 @@ def _prepare_colocation_args(self, model_var, obs_var): else: ts_type = self._get_colocation_ts_type(model_data.ts_type, obs_data.ts_type) args.update(ts_type=ts_type) + if self.obs_is_vertical_profile: + args.update( + colocation_layer_limits=self.colocation_layer_limits, + profile_layer_limits=self.profile_layer_limits, + ) return args def _check_dimensionality(self, args): @@ -1404,10 +1459,6 @@ def _check_dimensionality(self, args): if mdata.ndim == 4 and self.obs_vert_type == "Surface": mdata = mdata.extract_surface_level() args["data"] = mdata - elif mdata.ndim > 3: - raise DataDimensionError( - f"cannot co-locate model data with more than 3 dimensions: {mdata}" - ) if isinstance(odata, GriddedData): if odata.ndim == 4 and self.obs_vert_type == "Surface": @@ -1419,23 +1470,44 @@ def _check_dimensionality(self, args): ) return args - def _run_helper(self, model_var, obs_var): + def _run_helper(self, model_var: str, obs_var: str): logger.info(f"Running {self.model_id} ({model_var}) vs. {self.obs_id} ({obs_var})") args = self._prepare_colocation_args(model_var, obs_var) args = self._check_dimensionality(args) coldata = self._colocation_func(**args) - coldata.data.attrs["model_name"] = self.get_model_name() - coldata.data.attrs["obs_name"] = self.get_obs_name() - coldata.data.attrs["vert_code"] = self.obs_vert_type - coldata.data.attrs.update(**self.add_meta) - - if self.zeros_to_nan: - coldata = coldata.set_zeros_nan() - if self.model_to_stp: - coldata = correct_model_stp_coldata(coldata) - if self.save_coldata: - self._save_coldata(coldata) + if isinstance(coldata, ColocatedData): + coldata.data.attrs["model_name"] = self.get_model_name() + coldata.data.attrs["obs_name"] = self.get_obs_name() + coldata.data.attrs["vert_code"] = self.obs_vert_type + + coldata.data.attrs.update(**self.add_meta) + + if self.zeros_to_nan: + coldata = coldata.set_zeros_nan() + if self.model_to_stp: + coldata = correct_model_stp_coldata(coldata) + if self.save_coldata: + self._save_coldata(coldata) + + elif isinstance(coldata, ColocatedDataLists): # look into intertools chain.from_iterable + for i_list in coldata: + for coldata_obj in i_list: + coldata_obj.data.attrs["model_name"] = self.get_model_name() + coldata_obj.data.attrs["obs_name"] = self.get_obs_name() + coldata_obj.data.attrs["vert_code"] = self.obs_vert_type + coldata_obj.data.attrs.update(**self.add_meta) + if self.zeros_to_nan: + coldata_obj = coldata_obj.set_zeros_nan() + if self.model_to_stp: # TODO: check is this needs modifying + coldata = correct_model_stp_coldata(coldata_obj) + if self.save_coldata: + self._save_coldata(coldata_obj) + + else: + raise Exception( + f"Invalid coldata type returned by colocation function {self._colocation_func}" + ) return coldata diff --git a/pyaerocom/data/paths.ini b/pyaerocom/data/paths.ini index a8c951138..40496948b 100644 --- a/pyaerocom/data/paths.ini +++ b/pyaerocom/data/paths.ini @@ -84,7 +84,7 @@ AERONET_INV_V3L2_DAILY = ${BASEDIR}/aerocom/aerocom1/AEROCOM_OBSDATA/Aeronet.Inv # other observations EBAS_MULTICOLUMN = ${BASEDIR}/aerocom/aerocom1/AEROCOM_OBSDATA/EBASMultiColumn/data EEA = ${BASEDIR}/aerocom/aerocom1/AEROCOM_OBSDATA/EEA_AQeRep/renamed -EARLINET = ${BASEDIR}/aerocom/aerocom1/AEROCOM_OBSDATA/Export/Earlinet/CAMS/data +EARLINET = ${BASEDIR}/aerocom/aerocom1/AEROCOM_OBSDATA/EarlinetV2/data GAWTADsubsetAasEtAl = ${BASEDIR}/aerocom/aerocom1/AEROCOM_OBSDATA/PYAEROCOM/GAWTADSulphurSubset/data DMS_AMS_CVO = ${BASEDIR}/aerocom/aerocom1/AEROCOM_OBSDATA/PYAEROCOM/DMS_AMS_CVO/data GHOST_EEA_DAILY = ${BASEDIR}/aerocom/aerocom1/AEROCOM_OBSDATA/GHOST/data/EEA_AQ_eReporting/daily diff --git a/pyaerocom/data/variables.ini b/pyaerocom/data/variables.ini index b70c0df95..b7eeaddbe 100644 --- a/pyaerocom/data/variables.ini +++ b/pyaerocom/data/variables.ini @@ -243,20 +243,38 @@ use = abs550aer [ec550aer] description = Aerosol Extinction coefficient at 550nm wavelength_nm = 550 -unit = 1/Mm +unit = 1/km standard_name = volume_extinction_coefficient_in_air_due_to_ambient_aerosol_particles var_type = radiative properties -minimum = -10 -maximum = 1000 +minimum = -0.1 +maximum = 1 map_cmap = Blues -map_cbar_levels = [0, 4, 8, 12, 16, 20, 40, 60, 80, 100, 200, 300, 400] +map_cbar_levels = [0, 0.004, 0.008, 0.012, 0.016, 0.02, 0.04, 0.06, 0.08, 0.1, 0.2, 0.3, 0.4] dimensions = time, lev, lat, lon comments_and_purpose = Evaluation of the model Aerosol extinction profiles from CALIOP [ec532aer] description = Aerosol Extinction coefficient at 532nm +unit = 1/km wavelength_nm = 532 use = ec550aer +minimum = -0.1 +maximum = 1 +map_cmap = Blues +map_cbar_levels = [0, 0.004, 0.008, 0.012, 0.016, 0.02, 0.04, 0.06, 0.08, 0.1, 0.2, 0.3, 0.4] + +[ec355aer] +description = Aerosol Extinction coefficient at 355nm +wavelength_nm = 355 +unit = 1/km +standard_name = volume_extinction_coefficient_in_air_due_to_ambient_aerosol_particles +var_type = radiative properties +minimum = -0.1 +maximum = 1 +map_cmap = Blues +map_cbar_levels = [0, 0.004, 0.008, 0.012, 0.016, 0.02, 0.04, 0.06, 0.08, 0.1, 0.2, 0.3, 0.4] +dimensions = time, lev, lat, lon +comments_and_purpose = Evaluation of the model Aerosol extinction profiles from EARLINET [sc550aer] description = Aerosol light scattering coefficient at 550 nm @@ -332,29 +350,39 @@ map_cbar_levels = [0, 4, 8, 12, 16, 20, 40, 60, 80, 100, 200, 300, 400] [bsc550aer] description = Aerosol light backscattering coefficient at 550 nm wavelength_nm = 550 -unit = Mm-1 sr-1 +unit = km-1 sr-1 +minimum = -1000 +maximum = 1000 [bsc550dryaer] description = Dry aerosol light backscattering coefficient at 550 nm (RH<40) wavelength_nm = 550 unit = Mm-1 sr-1 +minimum = -1000 +maximum = 1000 [bsc532aer] description = Aerosol light backscattering coefficient at 532 nm wavelength_nm = 532 -unit = Mm-1 sr-1 +unit = km-1 sr-1 +minimum = -1000 +maximum = 1000 [bsc355aer] var_name = bsc355aer description = Aerosol light backscattering coefficient at 355 nm wavelength_nm = 355 -unit = Mm-1 sr-1 +unit = km-1 sr-1 +minimum = -1000 +maximum = 1000 [bsc1064aer] var_name = bsc1064aer description = Aerosol light backscattering coefficient at 1064 nm wavelength_nm = 1064 -unit = Mm-1 sr-1 +unit = km-1 sr-1 +minimum = -1000 +maximum = 1000 [ac550aer] description = Aerosol light absorption coefficient at 550 nm diff --git a/pyaerocom/extras/satellite_l2/aeolus_l2a.py b/pyaerocom/extras/satellite_l2/aeolus_l2a.py index 594dff534..e95141f50 100755 --- a/pyaerocom/extras/satellite_l2/aeolus_l2a.py +++ b/pyaerocom/extras/satellite_l2/aeolus_l2a.py @@ -2398,7 +2398,6 @@ def plot_profile_v2( except ValueError: # this happens when height_data and var_data have only one entry # set out_arr[time_step_idx,:] to NaN in this case for now - # breakpoint() out_arr[time_step_idx, :] = np.nan # if nansum > 0: @@ -2724,7 +2723,6 @@ def plot_profile_v3( except ValueError: # this happens when height_data and var_data have only one entry # set out_arr[time_step_idx,:] to NaN in this case for now - # breakpoint() out_arr[time_step_idx, :] = np.nan elif nansum == 0: diff --git a/pyaerocom/griddeddata.py b/pyaerocom/griddeddata.py index da48f9a73..8fc1199f2 100644 --- a/pyaerocom/griddeddata.py +++ b/pyaerocom/griddeddata.py @@ -1141,6 +1141,7 @@ def to_time_series( f"Extracting timeseries data from large array (shape: {self.shape}). " f"This may take a while..." ) + # if the method makes it to this point, it is 3 or 4 dimensional # and the first 3 dimensions are time, latitude, longitude. if self.ndim == 3: # data does not contain vertical dimension @@ -1162,6 +1163,7 @@ def to_time_series( if sample_points is None: sample_points = self._coords_to_iris_sample_points(**coords) + return self._to_timeseries_3D( sample_points, scheme, @@ -1317,7 +1319,12 @@ def _to_timeseries_3D( data = self._apply_vert_scheme(sample_points, vert_scheme) # ToDo: check if _to_timeseries_2D can be called here - return data.to_time_series(sample_points, scheme, collapse_scalar, add_meta=add_meta) + return data.to_time_series( + sample_points=sample_points, + scheme=scheme, + collapse_scalar=collapse_scalar, + add_meta=add_meta, + ) def _apply_vert_scheme(self, sample_points, vert_scheme): """Helper method that checks and infers vertical scheme for time @@ -1349,7 +1356,7 @@ def _apply_vert_scheme(self, sample_points, vert_scheme): "Cannot yet retrieve timeseries at altitude levels. Coming soon..." ) elif vert_scheme == "profile": - raise NotImplementedError("Cannot yet retrieve profile timeseries") + return self else: try: # check if vertical scheme can be converted into valid iris diff --git a/pyaerocom/io/fileconventions.py b/pyaerocom/io/fileconventions.py index 10a338b42..789441f4a 100644 --- a/pyaerocom/io/fileconventions.py +++ b/pyaerocom/io/fileconventions.py @@ -71,7 +71,12 @@ def info_init(self): extracted from filenames """ return dict( - year=None, var_name=None, ts_type=None, vert_code="", is_at_stations=False, data_id="" + year=None, + var_name=None, + ts_type=None, + vert_code="", + is_at_stations=False, + data_id="", ) def from_file(self, file): diff --git a/pyaerocom/io/iris_io.py b/pyaerocom/io/iris_io.py index d54360e4a..b00b78731 100644 --- a/pyaerocom/io/iris_io.py +++ b/pyaerocom/io/iris_io.py @@ -75,6 +75,7 @@ def load_cubes_custom(files, var_name=None, file_convention=None, perform_fmt_ch """ cubes = [] loaded_files = [] + for i, _file in enumerate(files): try: cube = load_cube_custom( @@ -122,6 +123,7 @@ def load_cube_custom(file, var_name=None, file_convention=None, perform_fmt_chec file = str(file) # iris load does not like PosixPath if perform_fmt_checks is None: perform_fmt_checks = const.GRID_IO.PERFORM_FMT_CHECKS + cube_list = iris.load(file) cube = None if var_name is None: diff --git a/pyaerocom/io/read_earlinet.py b/pyaerocom/io/read_earlinet.py index 0808afb03..5a349d12b 100755 --- a/pyaerocom/io/read_earlinet.py +++ b/pyaerocom/io/read_earlinet.py @@ -4,6 +4,7 @@ import re import numpy as np +import pandas as pd import xarray from pyaerocom import const @@ -25,7 +26,7 @@ class ReadEarlinet(ReadUngriddedBase): _FILEMASK = "*.*" #: version log of this class (for caching) - __version__ = "0.15_" + ReadUngriddedBase.__baseversion__ + __version__ = "0.16_" + ReadUngriddedBase.__baseversion__ #: Name of dataset (OBS_ID) DATA_ID = const.EARLINET_NAME @@ -34,7 +35,7 @@ class ReadEarlinet(ReadUngriddedBase): SUPPORTED_DATASETS = [const.EARLINET_NAME] #: default variables for read method - DEFAULT_VARS = ["ec532aer"] + DEFAULT_VARS = ["bsc532aer", "ec532aer"] #: all data values that exceed this number will be set to NaN on read. This #: is because iris, xarray, etc. assign a FILL VALUE of the order of e36 @@ -42,65 +43,69 @@ class ReadEarlinet(ReadUngriddedBase): _MAX_VAL_NAN = 1e6 #: variable name of altitude in files - ALTITUDE_ID = "Altitude" + ALTITUDE_ID = "altitude" #: temporal resolution - TS_TYPE = "native" + # Note: This is an approximation based on the fact that MOST of the data appears to be collected + # at an hourly reoslution. Some files are a little less, but typically this is the case + TS_TYPE = "hourly" #: dictionary specifying the file search patterns for each variable + # VAR_PATTERNS_FILE = { + # "ec532aer": "*.e532", + # "ec355aer": "*.e355", + # "bsc532aer": "*.b532", + # "bsc355aer": "*.b355", + # "bsc1064aer": "*.b1064", + # "zdust": "*.e*", + # } + VAR_PATTERNS_FILE = { - "ec532aer": "*.e532", - "ec355aer": "*.e355", - "bsc532aer": "*.b532", - "bsc355aer": "*.b355", - "bsc1064aer": "*.b1064", - "zdust": "*.e*", + "ec532aer": "_Lev02_e0532", + "ec355aer": "_Lev02_e0355", + "bsc532aer": "_Lev02_b0532", + "bsc355aer": "_Lev02_b0355", + "bsc1064aer": "_Lev02_b1064", + # "zdust": "*.e*", # not sure if EARLINET has this anymore } #: dictionary specifying the file column names (values) for each Aerocom #: variable (keys) VAR_NAMES_FILE = { - "ec532aer": "Extinction", - "ec355aer": "Extinction", - "ec1064aer": "Extinction", - "bsc532aer": "Backscatter", - "bsc355aer": "Backscatter", - "bsc1064aer": "Backscatter", - "zdust": "DustLayerHeight", + "ec532aer": "extinction", + "ec355aer": "extinction", + "ec1064aer": "extinction", + "bsc532aer": "backscatter", + "bsc355aer": "backscatter", + "bsc1064aer": "backscatter", + "zdust": "DustLayerHeight", # not sure if EARLINET has this anymore } - #: metadata names that are supposed to be imported META_NAMES_FILE = dict( - location="Location", - start_date="StartDate", - start_utc="StartTime_UT", - stop_utc="StopTime_UT", - longitude="Longitude_degrees_east", - latitude="Latitude_degrees_north", - wavelength_emis="EmissionWavelength_nm", - wavelength_det="DetectionWavelength_nm", - res_raw_m="ResolutionRaw_meter", - zenith_ang_deg="ZenithAngle_degrees", - instrument_name="System", - comments="Comments", - shots_avg="ShotsAveraged", - detection_mode="DetectionMode", - res_eval="ResolutionEvaluated", - input_params="InputParameters", - altitude="Altitude_meter_asl", - eval_method="EvaluationMethod", + location="location", + start_utc="measurement_start_datetime", + stop_utc="measurement_stop_datetime", + # wavelength_det="DetectionWavelength_nm", + # res_raw_m="ResolutionRaw_meter", + instrument_name="system", + comment="comment", + PI="PI", + dataset_name="title", + # station_name="station_ID", + website="references", + wavelength_emis="wavelength", + # detection_mode="DetectionMode", + # res_eval="ResolutionEvaluated", + # input_params="InputParameters", + altitude="altitude", + # eval_method="backscatter_evaluation_method", ) - #: metadata keys that are needed for reading (must be values in #: :attr:`META_NAMES_FILE`) META_NEEDED = [ - "Location", - "StartDate", - "StartTime_UT", - "StopTime_UT", - "Longitude_degrees_east", - "Latitude_degrees_north", - "Altitude_meter_asl", + "location", + "measurement_start_datetime", + "measurement_start_datetime", ] #: Metadata keys from :attr:`META_NAMES_FILE` that are additional to @@ -108,27 +113,25 @@ class ReadEarlinet(ReadUngriddedBase): #: to be inserted into :class:`UngriddedData` object created in :func:`read` KEEP_ADD_META = [ "location", - "wavelength_emis", - "wavelength_det", - "res_raw_m", - "zenith_ang_deg", - "comments", - "shots_avg", - "detection_mode", - "res_eval", - "input_params", - "eval_method", + "wavelength", + "zenith_angle", + "comment", + "shots", + "backscatter_evaluation_method", ] #: Attribute access names for unit reading of variable data VAR_UNIT_NAMES = dict( - Extinction=["ExtinctionUnits", "units"], - Backscatter=["BackscatterUnits", "units"], - DustLayerHeight=["units"], - Altitude="units", + extinction=["units"], + backscatter=["units"], + dustlayerheight=["units"], + altitude="units", ) #: Variable names of uncertainty data - ERR_VARNAMES = dict(ec532aer="ErrorExtinction", ec355aer="ErrorExtinction") + ERR_VARNAMES = dict( + ec532aer="error_extinction", + ec355aer="error_extinction", + ) #: If true, the uncertainties are also read (where available, cf. ERR_VARNAMES) READ_ERR = True @@ -166,6 +169,8 @@ def __init__(self, data_id=None, data_dir=None): #: files that were actually excluded from reading self.excluded_files = [] + self.is_vertical_profile = True + def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outliers=True): """Read EARLINET file and return it as instance of :class:`StationData` @@ -197,7 +202,7 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli if ( var in self.VAR_PATTERNS_FILE ): # make sure to only read what is supported by this file - if fnmatch.fnmatch(filename, self.VAR_PATTERNS_FILE[var]): + if self.VAR_PATTERNS_FILE[var] in filename: _vars.append(var) elif var in self.AUX_REQUIRES: _vars.append(var) @@ -209,7 +214,9 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli # create empty data object (is dictionary with extended functionality) data_out = StationData() - data_out["station_id"] = filename.split("/")[-2] + data_out["station_id"] = filename.split("/")[-1].split("_")[ + 2 + ] # loss of generality but should work. can also get from reading file if needed: data_in.station_ID data_out["data_id"] = self.data_id data_out["ts_type"] = self.TS_TYPE @@ -223,8 +230,27 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli # Iterate over the lines of the file self.logger.debug(f"Reading file {filename}") - data_in = xarray.open_dataset(filename) - + data_in = xarray.open_dataset(filename, engine="netcdf4") + + # getting the coords since no longer in metadata + # Put also just in the attributes. not sure why appears twice + data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64( + data_in["longitude"].values + ) + data_out["station_coords"]["latitude"] = data_out["latitude"] = np.float64( + data_in["latitude"].values + ) + data_out["altitude"] = np.float64( + data_in[ + "altitude" + ].values # altitude is defined in EARLINET in terms of altitude above sea level + ) # Note altitude is an array for the data, station altitude is different + data_out["station_coords"]["altitude"] = np.float64(data_in.station_altitude) + data_out["altitude_attrs"] = data_in[ + "altitude" + ].attrs # get attrs for altitude units + extra + + # get intersection of metadaa in ddataa_out and data_in for k, v in self.META_NAMES_FILE.items(): if v in self.META_NEEDED: _meta = data_in.attrs[v] @@ -235,25 +261,20 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli _meta = None data_out[k] = _meta - data_out["station_name"] = re.split(r"\s|,", data_out["location"])[0].strip() - - str_dummy = str(data_in.StartDate) - year, month, day = str_dummy[0:4], str_dummy[4:6], str_dummy[6:8] - - str_dummy = str(data_in.StartTime_UT).zfill(6) - hours, minutes, seconds = str_dummy[0:2], str_dummy[2:4], str_dummy[4:6] - - datestring = "-".join([year, month, day]) - startstring = "T".join([datestring, ":".join([hours, minutes, seconds])]) - - dtime = np.datetime64(startstring) - - str_dummy = str(data_in.StopTime_UT).zfill(6) - hours, minutes, seconds = str_dummy[0:2], str_dummy[2:4], str_dummy[4:6] - - stopstring = "T".join([datestring, ":".join([hours, minutes, seconds])]) - - stop = np.datetime64(stopstring) + # get metadata expected in StationData but not in data_in's metadata + data_out["wavelength_emis"] = data_in["wavelength"] + data_out["shots"] = np.float64(data_in["shots"]) + data_out["zenith_angle"] = np.float64(data_in["zenith_angle"]) + data_out["filename"] = filename + if "Lev02" in filename: + data_out["data_level"] = 2 + loc_split = data_in.attrs["location"].split(", ") + data_out["station_name"] = loc_split[0] + if len(loc_split) > 1: + data_out["country"] = loc_split[1] + + dtime = pd.Timestamp(data_in.measurement_start_datetime).to_numpy().astype("datetime64[s]") + stop = pd.Timestamp(data_in.measurement_stop_datetime).to_numpy().astype("datetime64[s]") # in case measurement goes over midnight into a new day if stop < dtime: @@ -268,6 +289,7 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli err_read = False unit_ok = False outliers_removed = False + has_altitude = False netcdf_var_name = self.VAR_NAMES_FILE[var] # check if the desired variable is in the file @@ -279,7 +301,7 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli # xarray.DataArray arr = data_in.variables[netcdf_var_name] # the actual data as numpy array (or float if 0-D data, e.g. zdust) - val = np.float64(arr) + val = np.squeeze(np.float64(arr)) # squeeze to 1D array # CONVERT UNIT unit = None @@ -308,7 +330,7 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli if read_err and var in self.ERR_VARNAMES: err_name = self.ERR_VARNAMES[var] if err_name in data_in.variables: - err = np.float64(data_in.variables[err_name]) + err = np.squeeze(np.float64(data_in.variables[err_name])) if unit_ok: err *= unit_fac err_read = True @@ -342,7 +364,7 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli wvlg = var_info[var].wavelength_nm wvlg_str = self.META_NAMES_FILE["wavelength_emis"] - if not wvlg == data_in.attrs[wvlg_str]: + if not wvlg == float(data_in[wvlg_str]): self.logger.info("No wavelength match") continue @@ -359,6 +381,7 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli alt_unit = to_alt_unit except Exception as e: self.logger.warning(f"Failed to convert unit: {repr(e)}") + has_altitude = True # remove outliers from data, if applicable if remove_outliers and unit_ok: @@ -383,13 +406,16 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli var_unit=unit, altitude_unit=alt_unit, ) + # Write everything into profile data_out[var] = profile data_out["var_info"][var].update( - unit_ok=unit_ok, err_read=err_read, outliers_removed=outliers_removed + unit_ok=unit_ok, + err_read=err_read, + outliers_removed=outliers_removed, + has_altitute=has_altitude, ) - return data_out def read( @@ -429,6 +455,7 @@ def read( UngriddedData data object """ + if vars_to_retrieve is None: vars_to_retrieve = self.DEFAULT_VARS elif isinstance(vars_to_retrieve, str): @@ -442,16 +469,23 @@ def read( self.get_file_list(vars_to_retrieve, pattern=pattern) files = self.files + # turn files into a list becauase I suspect there may be a bug if you don't do this + if isinstance(files, str): + files = [files] + if first_file is None: first_file = 0 if last_file is None: last_file = len(files) - files = files[first_file:last_file] + files = files[ + first_file : last_file + 1 + ] # think need to +1 here in order to actually get desired subset self.read_failed = [] data_obj = UngriddedData() + data_obj.is_vertical_profile = True col_idx = data_obj.index meta_key = -1.0 idx = 0 @@ -540,9 +574,15 @@ def read( data_obj.add_chunk(add) # write common meta info for this station - data_obj._data[idx:stop, col_idx["latitude"]] = stat["latitude"] - data_obj._data[idx:stop, col_idx["longitude"]] = stat["longitude"] - data_obj._data[idx:stop, col_idx["altitude"]] = stat["altitude"] + data_obj._data[idx:stop, col_idx["latitude"]] = stat["station_coords"][ + "latitude" + ] + data_obj._data[idx:stop, col_idx["longitude"]] = stat["station_coords"][ + "longitude" + ] + data_obj._data[idx:stop, col_idx["altitude"]] = stat["station_coords"][ + "altitude" + ] data_obj._data[idx:stop, col_idx["meta"]] = meta_key # write data to data object @@ -649,12 +689,12 @@ def get_file_list(self, vars_to_retrieve=None, pattern=None): patterns.append(_pattern) matches = [] - for root, dirnames, files in os.walk(self.data_dir): + for root, dirnames, files in os.walk(self.data_dir, topdown=True): paths = [os.path.join(root, f) for f in files] for _pattern in patterns: for path in paths: file = os.path.basename(path) - if not fnmatch.fnmatch(file, _pattern): + if not _pattern in file: continue elif file in exclude: self.excluded_files.append(path) diff --git a/pyaerocom/io/readungridded.py b/pyaerocom/io/readungridded.py index 365fc405d..67c0d1c8a 100755 --- a/pyaerocom/io/readungridded.py +++ b/pyaerocom/io/readungridded.py @@ -416,6 +416,12 @@ def read_dataset( if filter_post: filters = self._eval_filter_post(filter_post, data_id, vars_available) data_out = data_out.apply_filters(**filters) + + # Check to see if this reader is for a VerticalProfile + # It is currently only allowed that a reader can be for a VerticalProfile, not a species + if getattr(reader, "is_vertical_profile", None): + data_out.is_vertical_profile = reader.is_vertical_profile + return data_out def _eval_filter_post(self, filter_post, data_id, vars_available): @@ -638,15 +644,17 @@ def read( ) ) else: - data.append( - self.read_dataset( - ds, - vars_to_retrieve, - only_cached=only_cached, - filter_post=filter_post, - **kwargs, - ) + data_to_append = self.read_dataset( + ds, + vars_to_retrieve, + only_cached=only_cached, + filter_post=filter_post, + **kwargs, ) + data.append(data_to_append) + # TODO: Test this. UngriddedData can contain more than 1 variable + if getattr(data_to_append, "is_vertical_profile", None): + data.is_vertical_profile = data_to_append.is_vertical_profile logger.info(f"Successfully imported {ds} data") return data diff --git a/pyaerocom/stationdata.py b/pyaerocom/stationdata.py index 7a7c8f2bd..fdf72e3c2 100644 --- a/pyaerocom/stationdata.py +++ b/pyaerocom/stationdata.py @@ -25,7 +25,6 @@ from pyaerocom.time_resampler import TimeResampler from pyaerocom.tstype import TsType from pyaerocom.units_helpers import convert_unit, get_unit_conversion_fac -from pyaerocom.vertical_profile import VerticalProfile logger = logging.getLogger(__name__) @@ -434,7 +433,7 @@ def get_meta( # this has been handled above continue if self[key] is None and not add_none_vals: - logger.info(f"No metadata available for key {key}") + logger.debug(f"No metadata available for key {key}") continue val = self[key] diff --git a/pyaerocom/ungriddeddata.py b/pyaerocom/ungriddeddata.py index 4d6ed340c..f07fef615 100644 --- a/pyaerocom/ungriddeddata.py +++ b/pyaerocom/ungriddeddata.py @@ -132,6 +132,8 @@ class UngriddedData: STANDARD_META_KEYS = STANDARD_META_KEYS + ALLOWED_VERT_COORD_TYPES = ["altitude"] + @property def _ROWNO(self): return self._data.shape[0] @@ -155,6 +157,7 @@ def __init__(self, num_points=None, add_cols=None): self._idx = -1 self.filter_hist = {} + self._is_vertical_profile = False def _get_data_revision_helper(self, data_id): """ @@ -444,6 +447,21 @@ def has_flag_data(self): """Boolean specifying whether this object contains flag data""" return (~np.isnan(self._data[:, self._DATAFLAGINDEX])).any() + @property + def is_vertical_profile(self): + """Boolean specifying whether is vertical profile""" + return self._is_vertical_profile + + @is_vertical_profile.setter + def is_vertical_profile(self, value): + """ + Boolean specifying whether is vertical profile. + Note must be set in ReadUngridded based on the reader + because the instance of class used during reading is + not the same as the instance used later in the workflow + """ + self._is_vertical_profile = value + def copy(self): """Make a copy of this object @@ -1051,7 +1069,6 @@ def _metablock_to_stationdata( sd.station_coords[ck] = meta[ck] except KeyError: pass - # if no input variables are provided, use the ones that are available # for this metadata block if vars_to_convert is None: @@ -1714,6 +1731,8 @@ def _find_meta_matches(self, negate=None, *filters): if self._check_filter_match(meta, negate, *filters): meta_matches.append(meta_idx) for var in meta["var_info"]: + if var in self.ALLOWED_VERT_COORD_TYPES: + continue # altitude is not actually a variable but is stored in var_info like one try: totnum += len(self.meta_idx[meta_idx][var]) except KeyError: @@ -1966,6 +1985,8 @@ def _new_from_meta_blocks(self, meta_indices, totnum_new): new.metadata[meta_idx_new] = meta new.meta_idx[meta_idx_new] = {} for var in meta["var_info"]: + if var in self.ALLOWED_VERT_COORD_TYPES: + continue indices = self.meta_idx[meta_idx][var] totnum = len(indices) diff --git a/pyaerocom/vert_coords.py b/pyaerocom/vert_coords.py index d8e78dcd7..41143d885 100644 --- a/pyaerocom/vert_coords.py +++ b/pyaerocom/vert_coords.py @@ -215,7 +215,9 @@ class VerticalCoordinate: FUNS_YIELD = {"asc": "air_pressure", "ahspc": "air_pressure", "gph": "altitude"} _LEV_INCREASES_WITH_ALT = dict( - atmosphere_sigma_coordinate=False, atmosphere_hybrid_sigma_pressure_coordinate=False + atmosphere_sigma_coordinate=False, + atmosphere_hybrid_sigma_pressure_coordinate=False, + altitude=True, ) def __init__(self, name=None): diff --git a/pyaerocom/vertical_profile.py b/pyaerocom/vertical_profile.py index 67422b298..4c0715ffb 100644 --- a/pyaerocom/vertical_profile.py +++ b/pyaerocom/vertical_profile.py @@ -1,14 +1,25 @@ +from typing import Optional + import matplotlib.pyplot as plt import numpy as np +import numpy.typing as npt from pyaerocom._lowlevel_helpers import BrowseDict -# ToDo: complete docstring class VerticalProfile: """Object representing single variable profile data""" - def __init__(self, data, altitude, dtime, var_name, data_err, var_unit, altitude_unit): + def __init__( + self, + data: npt.ArrayLike, + altitude: npt.ArrayLike, + dtime, + var_name: str, + data_err: Optional[npt.ArrayLike], + var_unit: str, + altitude_unit: str, + ): self.var_name = var_name self.dtime = dtime self.data = data @@ -19,7 +30,11 @@ def __init__(self, data, altitude, dtime, var_name, data_err, var_unit, altitude self.var_info["altitude"] = dict(units=altitude_unit) self.var_info[self.var_name] = dict(units=var_unit) - assert len(self.data) == len(self.data_err) == len(self.altitude) + # Guard against having data (and data errors) with missing asociated altitude info + if hasattr(self.data_err, "__len__"): + assert len(self.data) == len(self.data_err) == len(self.altitude) + else: + assert len(self.data) == len(self.altitude) @property def data(self): @@ -66,7 +81,7 @@ def plot( figsize=None, ax=None, **kwargs, - ): + ): # pragma: no cover """Simple plot method for vertical profile""" if figsize is None: figsize = (4, 8) diff --git a/pyaerocom_env.yml b/pyaerocom_env.yml index 0e9b3a15a..8168be3be 100644 --- a/pyaerocom_env.yml +++ b/pyaerocom_env.yml @@ -13,7 +13,6 @@ dependencies: - seaborn >=0.8.0 - dask - geonum - - numpy - simplejson - requests - reverse-geocode diff --git a/tests/aeroval/test_aeroval_HIGHLEV.py b/tests/aeroval/test_aeroval_HIGHLEV.py index 8385d7a19..9006f6614 100644 --- a/tests/aeroval/test_aeroval_HIGHLEV.py +++ b/tests/aeroval/test_aeroval_HIGHLEV.py @@ -11,7 +11,7 @@ CHK_CFG1 = { "map": ["AERONET-Sun-od550aer_Column_TM5-AP3-CTRL-od550aer_2010.json"], - "contour": ["od550aer_TM5-AP3-CTRL.geojson", "od550aer_TM5-AP3-CTRL.json"], + "contour": 0, "hm": ["glob_stats_daily.json", "glob_stats_monthly.json", "glob_stats_yearly.json"], "hm/ts": 10, # number of .json files in sub dir "scat": ["AERONET-Sun-od550aer_Column_TM5-AP3-CTRL-od550aer_2010.json"], @@ -21,17 +21,15 @@ CHK_CFG2 = { "map": [ - "AERONET-Sun-od550aer_Column_TM5-AP3-CTRL-od550aer_2010.json", "AERONET-SDA-od550aer_Column_TM5-AP3-CTRL-od550aer_2010.json", ], "contour": 0, "hm": ["glob_stats_monthly.json"], - "hm/ts": 21, # number of .json files in subdir + "hm/ts": 9, "scat": [ - "AERONET-Sun-od550aer_Column_TM5-AP3-CTRL-od550aer_2010.json", "AERONET-SDA-od550aer_Column_TM5-AP3-CTRL-od550aer_2010.json", ], - "ts": 40, # number of .json files in subdir + "ts": 17, "ts/diurnal": 0, # number of .json files in subdir } diff --git a/tests/aeroval/test_coldatatojson_helpers2.py b/tests/aeroval/test_coldatatojson_helpers2.py index 563dc0154..9a2c5c638 100644 --- a/tests/aeroval/test_coldatatojson_helpers2.py +++ b/tests/aeroval/test_coldatatojson_helpers2.py @@ -18,6 +18,7 @@ _process_statistics_timeseries, get_heatmap_filename, get_json_mapname, + get_profile_filename, get_stationfile_name, get_timeseries_file_name, ) @@ -49,6 +50,11 @@ def test_get_json_mapname(): assert json == "obs1-var1_Column_mod1-var1_period.json" +def get_profile_filename(): + json = get_profile_filename("reg1", "obs1", "var1") + assert json == "reg1_obs1_var1.json" + + @pytest.mark.parametrize( "to_ts_types", [ diff --git a/tests/aeroval/test_experiment_output.py b/tests/aeroval/test_experiment_output.py index 9cf917611..620084ce7 100644 --- a/tests/aeroval/test_experiment_output.py +++ b/tests/aeroval/test_experiment_output.py @@ -281,7 +281,7 @@ def test_Experiment_Output_clean_json_files_CFG1_INVALIDMOD(eval_config: dict): proc.run() del cfg.model_cfg["mod1"] modified = proc.exp_output.clean_json_files() - assert len(modified) == 15 + assert len(modified) == 13 @geojson_unavail diff --git a/tests/fixtures/data_access.py b/tests/fixtures/data_access.py index 35ad13816..e9ae96cf5 100644 --- a/tests/fixtures/data_access.py +++ b/tests/fixtures/data_access.py @@ -23,7 +23,7 @@ TESTDATA_NAME = "testdata-minimal" #: That's were the testdata can be downloaded from -TESTDATA_URL = f"https://pyaerocom-ng.met.no/pyaerocom-suppl/{TESTDATA_NAME}.tar.gz.20220707" +TESTDATA_URL = f"https://pyaerocom-ng.met.no/pyaerocom-suppl/{TESTDATA_NAME}.tar.gz.20230919" #: Directory where testdata will be downloaded into TESTDATA_ROOT = Path(const.OUTPUTDIR) / TESTDATA_NAME @@ -62,6 +62,7 @@ def path(self) -> Path: "G.EBAS.hourly.Subset": DataForTests("obsdata/GHOST/data/EBAS/hourly", ReadGhost), "EEA_AQeRep.v2.Subset": DataForTests("obsdata/EEA_AQeRep.v2/renamed", io.ReadEEAAQEREP_V2), "Earlinet-test": DataForTests("obsdata/Earlinet", io.ReadEarlinet), + "Earlinet-test-3d-collocation": DataForTests("obsdata/Earlinet/"), } diff --git a/tests/io/test_read_earlinet.py b/tests/io/test_read_earlinet.py index e2780d1a1..ac7391d9d 100644 --- a/tests/io/test_read_earlinet.py +++ b/tests/io/test_read_earlinet.py @@ -10,13 +10,10 @@ from tests.conftest import TEST_RTOL ROOT: str = const.OBSLOCS_UNGRIDDED["Earlinet-test"] + TEST_FILES: list[str] = [ - f"{ROOT}/ev/ev1008192050.e532", - f"{ROOT}/ev/ev1009162031.e532", - f"{ROOT}/ev/ev1012131839.e532", - f"{ROOT}/ev/ev1011221924.e532", - f"{ROOT}/ev/ev1105122027.e532", - f"{ROOT}/ms/ms1005242029.e355", + f"{ROOT}/EARLINET_AerRemSen_cyc_Lev02_e0355_202104262030_202104262130_v01_qc03.nc", + f"{ROOT}/EARLINET_AerRemSen_waw_Lev02_b0532_202109221030_202109221130_v01_qc03.nc", ] @@ -28,14 +25,7 @@ def test_all_files_exist(): @pytest.mark.parametrize( "num,vars_to_retrieve", [ - (0, "ec532aer"), - (0, ["ec532aer", "zdust"]), - (0, ReadEarlinet.PROVIDES_VARIABLES), - (1, ReadEarlinet.PROVIDES_VARIABLES), - (2, ReadEarlinet.PROVIDES_VARIABLES), - (3, ReadEarlinet.PROVIDES_VARIABLES), - (4, ReadEarlinet.PROVIDES_VARIABLES), - (5, ReadEarlinet.PROVIDES_VARIABLES), + (0, "ec355aer"), ], ) def test_ReadEarlinet_read_file(num: int, vars_to_retrieve: list[str]): @@ -44,31 +34,31 @@ def test_ReadEarlinet_read_file(num: int, vars_to_retrieve: list[str]): stat = read.read_file(paths[num], vars_to_retrieve) assert "data_level" in stat - assert "wavelength_det" in stat + assert "wavelength_emis" in stat assert "has_zdust" in stat - assert "eval_method" in stat + assert "location" in stat if num != 0: return - assert "ec532aer" in stat.var_info - assert stat.var_info["ec532aer"]["unit_ok"] - assert stat.var_info["ec532aer"]["err_read"] - assert stat.var_info["ec532aer"]["outliers_removed"] + assert "ec355aer" in stat.var_info + assert stat.var_info["ec355aer"]["unit_ok"] + assert stat.var_info["ec355aer"]["err_read"] + assert stat.var_info["ec355aer"]["outliers_removed"] - ec532aer = stat.ec532aer - assert isinstance(ec532aer, VerticalProfile) - assert len(ec532aer.data) == 253 - assert np.sum(np.isnan(ec532aer.data)) == 216 + ec355aer = stat.ec355aer + assert isinstance(ec355aer, VerticalProfile) + assert len(ec355aer.data) == 164 + assert np.sum(np.isnan(ec355aer.data)) == 0 - assert np.nanmean(ec532aer.data) == pytest.approx(4.463068618148296, rel=TEST_RTOL) - assert np.nanstd(ec532aer.data) == pytest.approx(1.8529271228530515, rel=TEST_RTOL) + assert np.nanmean(ec355aer.data) == pytest.approx(0.02495260001522142, rel=TEST_RTOL) + assert np.nanstd(ec355aer.data) == pytest.approx(0.03295176956505217, rel=TEST_RTOL) - assert np.nanmean(ec532aer.data_err) == pytest.approx(4.49097234883772, rel=TEST_RTOL) - assert np.nanstd(ec532aer.data_err) == pytest.approx(0.8332285038985179, rel=TEST_RTOL) + assert np.nanmean(ec355aer.data_err) == pytest.approx(0.003919774151078758, rel=TEST_RTOL) + assert np.nanstd(ec355aer.data_err) == pytest.approx(0.0020847733483625517, rel=TEST_RTOL) - assert np.min(ec532aer.altitude) == pytest.approx(331.29290771484375, rel=TEST_RTOL) - assert np.max(ec532aer.altitude) == pytest.approx(7862.52490234375, rel=TEST_RTOL) + assert np.min(ec355aer.altitude) == pytest.approx(935.4610692253234, rel=TEST_RTOL) + assert np.max(ec355aer.altitude) == pytest.approx(10678.245216562595, rel=TEST_RTOL) @pytest.mark.parametrize( @@ -89,31 +79,35 @@ def test_ReadEarlinet_read_file_error(vars_to_retrieve: str, error: str): def test_ReadEarlinet_read(): read = ReadEarlinet() read.files = TEST_FILES - data = read.read(vars_to_retrieve="ec532aer") + data = read.read(vars_to_retrieve="ec355aer") - assert len(data.metadata) == 5 - assert data.shape == (786, 12) + assert len(data.metadata) == 1 + assert data.shape == (164, 12) - assert np.nanmin(data._data[:, data._DATAINDEX]) == pytest.approx(-0.440742, rel=TEST_RTOL) - assert np.nanmean(data._data[:, data._DATAINDEX]) == pytest.approx(24.793547, rel=TEST_RTOL) - assert np.nanmax(data._data[:, data._DATAINDEX]) == pytest.approx(167.90787, rel=TEST_RTOL) + assert np.nanmin(data._data[:, data._DATAINDEX]) == pytest.approx( + -0.002188435098876817, rel=TEST_RTOL + ) + assert np.nanmean(data._data[:, data._DATAINDEX]) == pytest.approx( + 0.02495260001522142, rel=TEST_RTOL + ) + assert np.nanmax(data._data[:, data._DATAINDEX]) == pytest.approx( + 0.16084047083963124, rel=TEST_RTOL + ) - merged = data.to_station_data("Evora", freq="monthly") - assert np.nanmin(merged.ec532aer) == pytest.approx(0.220322, rel=TEST_RTOL) - assert np.nanmean(merged.ec532aer) == pytest.approx(23.093238, rel=TEST_RTOL) - assert np.nanmax(merged.ec532aer) == pytest.approx(111.478665, rel=TEST_RTOL) + merged = data.to_station_data(0) + # same values as above because only one meta_idx + assert np.nanmin(merged.ec355aer) == pytest.approx(-0.002188435098876817, rel=TEST_RTOL) + assert np.nanmean(merged.ec355aer) == pytest.approx(0.02495260001522142, rel=TEST_RTOL) + assert np.nanmax(merged.ec355aer) == pytest.approx(0.16084047083963124, rel=TEST_RTOL) @pytest.mark.parametrize( "vars_to_retrieve,pattern,num", [ - (None, None, 5), + (None, None, 1), (["ec355aer"], None, 1), - (["zdust"], None, 6), (["bsc355aer"], None, 0), - (["bsc532aer"], None, 0), - (None, "*ev*", 5), - (None, "*xy*", 0), + (["bsc532aer"], None, 1), ], ) def test_ReadEarlinet_get_file_list( @@ -135,4 +129,4 @@ def test_ReadEarlinet__get_exclude_filelist(): reader = ReadEarlinet("Earlinet-test") reader.EXCLUDE_CASES.append("onefile.txt") files = reader.get_file_list(reader.PROVIDES_VARIABLES) - assert len(files) == 5 + assert len(files) == 1 diff --git a/tests/test_colocation_3d.py b/tests/test_colocation_3d.py new file mode 100644 index 000000000..65c206547 --- /dev/null +++ b/tests/test_colocation_3d.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import pickle + +import iris +import numpy as np +import pytest + +from pyaerocom import GriddedData +from pyaerocom.colocation_3d import ColocatedDataLists, colocate_vertical_profile_gridded +from tests.fixtures.data_access import TEST_DATA + +ROOT = TEST_DATA["Earlinet-test-3d-collocation"].path + +TEST_FILE = [ + ROOT / "earlinet_example_for_ci.pkl", +] + + +@pytest.fixture +def fake_model_data_with_altitude(): + longitude = iris.coords.DimCoord( + np.linspace(-15, 25, 20), var_name="lon", standard_name="longitude", units="degrees" + ) + latitude = iris.coords.DimCoord( + np.linspace(50, 55, 10), var_name="lat", standard_name="latitude", units="degrees" + ) + altitude = iris.coords.DimCoord( + np.linspace(0, 60000, 10000), var_name="alt", standard_name="altitude", units="meters" + ) + time = iris.coords.DimCoord( + np.arange(18896, 18896 + 7, 1), + var_name="time", + standard_name="time", + units="days since epoch", + ) + dummy = iris.cube.Cube( + np.ones((time.shape[0], longitude.shape[0], latitude.shape[0], altitude.shape[0])) + ) + + latitude.guess_bounds() + longitude.guess_bounds() + altitude.guess_bounds() + + dummy.add_dim_coord(time, 0) + dummy.add_dim_coord(longitude, 1) + dummy.add_dim_coord(latitude, 2) + dummy.add_dim_coord(altitude, 3) + + dummy.var_name = "bsc532aer" + + data = GriddedData(dummy) + + return data + + +@pytest.fixture +def example_earlinet_ungriddeddata(): + path = TEST_FILE[0] + assert path.is_file(), f"you should have {path}, update ~/MyPyaerocom/testdata-minimal/" + return pickle.load(path.open("rb")) + + +@pytest.mark.parametrize( + "ts_type,resample_how,min_num_obs,use_climatology_ref,colocation_layer_limits,profile_layer_limits", + [ + pytest.param( + "daily", + "mean", + {"monthly": {"daily": 25}}, + False, + [ + {"start": 0, "end": 6000}, + ], + [ + {"start": 0, "end": 6000}, + ], + id="fake_data", + ) + ], +) +def test_colocate_vertical_profile_gridded( + fake_model_data_with_altitude, + example_earlinet_ungriddeddata, + ts_type, + resample_how, + min_num_obs, + use_climatology_ref, + colocation_layer_limits, + profile_layer_limits, +): + colocated_data_list = colocate_vertical_profile_gridded( + data=fake_model_data_with_altitude, + data_ref=example_earlinet_ungriddeddata, + ts_type=ts_type, + resample_how=resample_how, + min_num_obs=min_num_obs, + use_climatology_ref=use_climatology_ref, + colocation_layer_limits=colocation_layer_limits, + profile_layer_limits=profile_layer_limits, + ) + + assert colocated_data_list + assert isinstance(colocated_data_list, ColocatedDataLists) + assert len(colocated_data_list) == 2 # col objs for statistics and viz + assert len(colocated_data_list[0]) == len(colocation_layer_limits) + assert len(colocated_data_list[1]) == len(profile_layer_limits) + assert all("just_for_viz" in obj.metadata for obj in colocated_data_list[1]) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 1ed5fa6f4..135f040b6 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -17,7 +17,7 @@ def test_get_standarad_name(): def test_get_standard_unit(): - assert helpers.get_standard_unit("ec550aer") == "1/Mm" + assert helpers.get_standard_unit("ec550aer") == "1/km" def test_get_lowest_resolution(): diff --git a/tests/test_stationdata.py b/tests/test_stationdata.py index cc216321f..462d490e6 100644 --- a/tests/test_stationdata.py +++ b/tests/test_stationdata.py @@ -51,7 +51,7 @@ def test_StationData_copy(): stat4 = stat2.copy() stat4["longitude"] = "42" -ec_earlinet = get_earlinet_data("ec532aer") +ec_earlinet = get_earlinet_data("ec355aer") def test_StationData_default_vert_grid(): @@ -113,7 +113,7 @@ def test_StationData_check_var_unit_aerocom(): assert stat.get_unit("ec550aer") == "m-1" stat.check_var_unit_aerocom("ec550aer") - assert stat.get_unit("ec550aer") == "1/Mm" + assert stat.get_unit("ec550aer") == "1/km" @pytest.mark.parametrize( @@ -150,7 +150,7 @@ def test_StationData_check_unit(): def test_StationData_check_unit_error(): with pytest.raises(DataUnitError) as e: stat1.check_unit("ec550aer", None) - assert str(e.value) == "Invalid unit m-1 (expected 1/Mm)" + assert str(e.value) == "Invalid unit m-1 (expected 1/km)" def test_StationData_convert_unit(): @@ -328,7 +328,6 @@ def test_StationData_merge_varinfo_error(stat: StationData, other: StationData): [ (stat1, "od550aer", False), (stat2, "od550aer", False), - (ec_earlinet, "ec532aer", True), ], ) def test_StationData_check_if_3d(stat: StationData, var_name: str, result: bool): @@ -389,15 +388,14 @@ def test_StationData_remove_variable_error(): def test_StationData_select_altitude_DataArray(): - selection = ec_earlinet.select_altitude("ec532aer", (1000, 2000)) - assert isinstance(selection, DataArray) - assert selection.shape == (4, 5) - assert list(selection.altitude.values) == [1125, 1375, 1625, 1875] + selection = ec_earlinet.select_altitude("ec355aer", (1000, 2000)) + assert isinstance(selection, DataArray) or isinstance(selection, pd.Series) + assert selection.shape == (16,) def test_StationData_select_altitude_DataArray_error(): with pytest.raises(NotImplementedError) as e: - ec_earlinet.select_altitude("ec532aer", 1000) + ec_earlinet.select_altitude("ec355aer", 1000) assert str(e.value) == "So far only a range (low, high) is supported for altitude extraction." @@ -425,7 +423,6 @@ def test_StationData_select_altitude_Series_error( "stat,var_name,kwargs", [ (stat1, "od550aer", dict()), - (ec_earlinet, "ec532aer", dict(altitude=(0, 1000))), ], ) def test_StationData_to_timeseries(stat: StationData, var_name: str, kwargs: dict): @@ -433,27 +430,6 @@ def test_StationData_to_timeseries(stat: StationData, var_name: str, kwargs: dic assert isinstance(series, pd.Series) -@pytest.mark.parametrize( - "kwargs,error", - [ - pytest.param( - dict(), - "please specify altitude range via input arg: altitude, e.g. altitude=(100,110)", - id="no altitude", - ), - pytest.param( - dict(altitude=(0, 10)), - "no data in specified altitude range", - id="no data", - ), - ], -) -def test_StationData_to_timeseries_error(kwargs: dict, error: str): - with pytest.raises(ValueError) as e: - ec_earlinet.to_timeseries("ec532aer", **kwargs) - assert str(e.value) == error - - def test_StationData_plot_timeseries(): ax = stat1.plot_timeseries(var_name="od550aer") assert isinstance(ax, Axes)