diff --git a/pyaerocom/aeroval/coldatatojson_engine.py b/pyaerocom/aeroval/coldatatojson_engine.py index 803020957..f7253cd16 100644 --- a/pyaerocom/aeroval/coldatatojson_engine.py +++ b/pyaerocom/aeroval/coldatatojson_engine.py @@ -234,6 +234,8 @@ def process_coldata(self, coldata: ColocatedData): station_names=coldata.data.station_name.values, periods=periods, seasons=seasons, + obs_name=obs_name, + var_name_web=var_name_web, ) logger.info( @@ -276,6 +278,8 @@ def _process_profile_data_for_visualization( station_names: ArrayLike = None, periods: tuple[str, ...] = None, seasons: tuple[str, ...] = None, + obs_name: str = None, + var_name_web: str = None, ): if region_names is None and station_names is None: raise ValueError("Both region_id and station_name can not both be None") @@ -289,8 +293,16 @@ def _process_profile_data_for_visualization( periods=periods, seasons=seasons, ) - - self.exp_output.add_profile_entry(data, profile_viz, periods, seasons) + location = region_names[regid] + self.exp_output.add_profile_entry( + data, + profile_viz, + periods, + seasons, + location=location, + network=obs_name, + obsvar=var_name_web, + ) # Loop through stations for station_name in station_names: @@ -302,7 +314,15 @@ def _process_profile_data_for_visualization( seasons=seasons, ) - self.exp_output.add_profile_entry(data, profile_viz, periods, seasons) + self.exp_output.add_profile_entry( + data, + profile_viz, + periods, + seasons, + location=station_name, + network=obs_name, + obsvar=var_name_web, + ) def _process_stats_timeseries_for_all_regions( self, diff --git a/pyaerocom/aeroval/coldatatojson_helpers.py b/pyaerocom/aeroval/coldatatojson_helpers.py index cb908888a..31cd4a11b 100644 --- a/pyaerocom/aeroval/coldatatojson_helpers.py +++ b/pyaerocom/aeroval/coldatatojson_helpers.py @@ -16,10 +16,17 @@ from pyaerocom._warnings import ignore_warnings from pyaerocom.aeroval.exceptions import ConfigError, TrendsError from pyaerocom.aeroval.fairmode_stats import fairmode_stats -from pyaerocom.aeroval.helpers import _get_min_max_year_periods, _period_str_to_timeslice +from pyaerocom.aeroval.helpers import ( + _get_min_max_year_periods, + _period_str_to_timeslice, +) from pyaerocom.config import ALL_REGION_NAME from pyaerocom.exceptions import DataCoverageError, TemporalResolutionError -from pyaerocom.region import Region, find_closest_region_coord, get_all_default_region_ids +from pyaerocom.region import ( + Region, + find_closest_region_coord, + get_all_default_region_ids, +) from pyaerocom.region_defs import HTAP_REGIONS_DEFAULT, OLD_AEROCOM_REGIONS from pyaerocom.stats.stats import _init_stats_dummy, calculate_statistics from pyaerocom.trends_engine import TrendsEngine @@ -627,7 +634,7 @@ def _make_regional_trends( station_obs_trends.append(obs_trend) except TrendsError as e: msg = f"Failed to calculate trends, and will skip. This was due to {e}" - logger.warning(msg) + logger.info(msg) if len(station_obs_trends) == 0 or len(station_mod_trends) == 0: trends_successful = False @@ -813,7 +820,7 @@ def process_trends( trends_successful = True except TrendsError as e: msg = f"Failed to calculate trends, and will skip. This was due to {e}" - logger.warning(msg) + logger.info(msg) if trends_successful: # The whole trends dicts are placed in the stats dict @@ -1028,7 +1035,7 @@ def _process_map_and_scat( except TrendsError as e: msg = f"Failed to calculate trends, and will skip. This was due to {e}" - logger.warning(msg) + logger.info(msg) perstr = f"{per}-{season}" map_stat[freq][perstr] = stats @@ -1046,7 +1053,11 @@ def _process_map_and_scat( obs = mod = jsdate = scat_dummy else: obs, mod = obs_vals.tolist(), mod_vals.tolist() - scat_data[site][perstr] = {"obs": obs, "mod": mod, "date": jsdate} + scat_data[site][perstr] = { + "obs": obs, + "mod": mod, + "date": jsdate, + } return (map_data, scat_data) @@ -1547,13 +1558,17 @@ def process_profile_data_for_regions( subset = per_season_subset.filter_region( region_id=region_id, check_country_meta=use_country ) - - output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :]) - output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :]) + with ignore_warnings( + RuntimeWarning, + "Mean of empty slice", + "All-NaN slice encountered", + ): + output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :]) + output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :]) except (DataCoverageError, TemporalResolutionError) as e: msg = f"Failed to access subset timeseries, and will skip. Reason was: {e}" - logger.warning(msg) + logger.info(msg) output["obs"][freq][perstr] = np.nan output["mod"][freq][perstr] = np.nan @@ -1614,12 +1629,17 @@ def process_profile_data_for_stations( == station_name, # in this case a station ] # Assumes ordering of station name matches - output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :]) - output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :]) + with ignore_warnings( + RuntimeWarning, + "Mean of empty slice", + "All-NaN slice encountered", + ): + output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :]) + output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :]) except (DataCoverageError, TemporalResolutionError) as e: msg = f"Failed to access subset timeseries, and will skip. Reason was: {e}" - logger.warning(msg) + logger.info(msg) output["obs"][freq][perstr] = np.nan output["mod"][freq][perstr] = np.nan diff --git a/pyaerocom/aeroval/experiment_output.py b/pyaerocom/aeroval/experiment_output.py index d00b0db4a..f157549a3 100644 --- a/pyaerocom/aeroval/experiment_output.py +++ b/pyaerocom/aeroval/experiment_output.py @@ -8,7 +8,12 @@ import aerovaldb from pyaerocom import const -from pyaerocom._lowlevel_helpers import DirLoc, StrType, TypeValidator, sort_dict_by_name +from pyaerocom._lowlevel_helpers import ( + DirLoc, + StrType, + TypeValidator, + sort_dict_by_name, +) from pyaerocom.aeroval.collections import ObsCollection from pyaerocom.aeroval.glob_defaults import ( VariableInfo, @@ -32,7 +37,8 @@ from pyaerocom.variable_helpers import get_aliases MapInfo = namedtuple( - "MapInfo", ["obs_network", "obs_var", "vert_code", "mod_name", "mod_var", "time_period"] + "MapInfo", + ["obs_network", "obs_var", "vert_code", "mod_name", "mod_var", "time_period"], ) logger = logging.getLogger(__name__) @@ -94,9 +100,11 @@ def __init__(self, cfg: EvalSetup): self.cfg = cfg super().__init__( cfg.proj_id, - cfg.path_manager.json_basedir - if cfg.path_manager.avdb_resource is None - else cfg.path_manager.json_basedir, + ( + cfg.path_manager.json_basedir + if cfg.path_manager.avdb_resource is None + else cfg.path_manager.json_basedir + ), ) # dictionary that will be filled by json cleanup methods to check for @@ -1030,7 +1038,14 @@ def write_timeseries(self, data): ) def add_profile_entry( - self, data: ColocatedData, profile_viz: dict, periods: list[str], seasons: list[str] + self, + data: ColocatedData, + profile_viz: dict, + periods: list[str], + seasons: list[str], + location, + network, + obsvar, ): """Adds an entry for the colocated data to profiles.json. @@ -1041,7 +1056,9 @@ def add_profile_entry( seasons (list[str]): seasons to compute over (e.g., All, DJF, etc.) """ with self.avdb.lock(): - current = self.avdb.get_profiles(self.proj_id, self.exp_id, default={}) + current = self.avdb.get_profiles( + self.proj_id, self.exp_id, location, network, obsvar, default={} + ) current = recursive_defaultdict(current) for freq, coldata in data.items(): @@ -1059,6 +1076,19 @@ def add_profile_entry( ): # only store incremental increases in the layers current[model_name]["z"].append(midpoint) + # old boilerplate to get around recursive_default_dict issues + if "obs" not in current[model_name]: + current[model_name]["obs"] = {} + + if freq not in current[model_name]["obs"]: + current[model_name]["obs"][freq] = {} + + if "mod" not in current[model_name]: + current[model_name]["mod"] = {} + + if freq not in current[model_name]["mod"]: + current[model_name]["mod"][freq] = {} + for per in periods: for season in seasons: perstr = f"{per}-{season}" @@ -1084,4 +1114,4 @@ def add_profile_entry( } current[model_name] = round_floats(current[model_name]) - self.avdb.put_profiles(current, self.proj_id, self.exp_id) + self.avdb.put_profiles(current, self.proj_id, self.exp_id, location, network, obsvar) diff --git a/pyaerocom/io/read_earlinet.py b/pyaerocom/io/read_earlinet.py index bba5f4d28..b75288e2e 100755 --- a/pyaerocom/io/read_earlinet.py +++ b/pyaerocom/io/read_earlinet.py @@ -24,7 +24,7 @@ class ReadEarlinet(ReadUngriddedBase): _FILEMASK = "*.*" #: version log of this class (for caching) - __version__ = "0.17_" + ReadUngriddedBase.__baseversion__ + __version__ = "0.18_" + ReadUngriddedBase.__baseversion__ #: Name of dataset (OBS_ID) DATA_ID = const.EARLINET_NAME @@ -35,6 +35,14 @@ class ReadEarlinet(ReadUngriddedBase): #: default variables for read method DEFAULT_VARS = ["bsc532aer", "ec532aer"] + # These are applied to all files. If new cloud filter names are discovered they should be added here. + # As of 20.08.24, however, there are still files with less reliable data which get through the filters. + # https://github.com/metno/pyaerocom/issues/1310 + CLOUD_FILTERS = { + "cloud_mask_type": 0, # "no_cloudmask_available manual_cloudmask automatic_cloudmask" + "cirrus_contamination": 2, # "not_available no_cirrus cirrus_detected" + } + #: all data values that exceed this number will be set to NaN on read. This #: is because iris, xarray, etc. assign a FILL VALUE of the order of e36 #: to missing data in the netcdf files @@ -219,6 +227,12 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli self.logger.debug(f"Reading file {filename}") with xarray.open_dataset(filename, engine="netcdf4") as data_in: + for filter in self.CLOUD_FILTERS: + if filter in data_in.variables: + if data_in.variables[filter].item() == self.CLOUD_FILTERS[filter]: + self.logger.debug(f"Skipping {filename} due to cloud filtering") + continue + # getting the coords since no longer in metadata # Put also just in the attributes. not sure why appears twice data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64( diff --git a/pyaerocom_env.yml b/pyaerocom_env.yml index 8f04fe416..e1915bf86 100644 --- a/pyaerocom_env.yml +++ b/pyaerocom_env.yml @@ -18,7 +18,7 @@ dependencies: - tqdm - openpyxl - typer >=0.7.0 - - pydantic > 2.0.0 + - pydantic >= 2.7.1 - pyproj >= 3.0.0 - pooch >=1.7.0 - psutil >= 5.0.0 diff --git a/pyproject.toml b/pyproject.toml index 6543d0733..d2d346a82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ ] requires-python = ">=3.10" dependencies = [ - "aerovaldb == 0.0.15", + "aerovaldb==0.0.17", "scitools-iris>=3.8.1", "xarray>=2022.12.0", "cartopy>=0.21.1",