Skip to content

Commit

Permalink
Merge pull request #1315 from metno/cloud_filtering_1310
Browse files Browse the repository at this point in the history
EARLINET cloud filtering
  • Loading branch information
lewisblake authored Aug 20, 2024
2 parents f73e411 + 9e48af9 commit 7c6d8b8
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 19 deletions.
46 changes: 33 additions & 13 deletions pyaerocom/aeroval/coldatatojson_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,17 @@
from pyaerocom._warnings import ignore_warnings
from pyaerocom.aeroval.exceptions import ConfigError, TrendsError
from pyaerocom.aeroval.fairmode_stats import fairmode_stats
from pyaerocom.aeroval.helpers import _get_min_max_year_periods, _period_str_to_timeslice
from pyaerocom.aeroval.helpers import (
_get_min_max_year_periods,
_period_str_to_timeslice,
)
from pyaerocom.config import ALL_REGION_NAME
from pyaerocom.exceptions import DataCoverageError, TemporalResolutionError
from pyaerocom.region import Region, find_closest_region_coord, get_all_default_region_ids
from pyaerocom.region import (
Region,
find_closest_region_coord,
get_all_default_region_ids,
)
from pyaerocom.region_defs import HTAP_REGIONS_DEFAULT, OLD_AEROCOM_REGIONS
from pyaerocom.stats.stats import _init_stats_dummy, calculate_statistics
from pyaerocom.trends_engine import TrendsEngine
Expand Down Expand Up @@ -627,7 +634,7 @@ def _make_regional_trends(
station_obs_trends.append(obs_trend)
except TrendsError as e:
msg = f"Failed to calculate trends, and will skip. This was due to {e}"
logger.warning(msg)
logger.info(msg)

if len(station_obs_trends) == 0 or len(station_mod_trends) == 0:
trends_successful = False
Expand Down Expand Up @@ -813,7 +820,7 @@ def process_trends(
trends_successful = True
except TrendsError as e:
msg = f"Failed to calculate trends, and will skip. This was due to {e}"
logger.warning(msg)
logger.info(msg)

if trends_successful:
# The whole trends dicts are placed in the stats dict
Expand Down Expand Up @@ -1028,7 +1035,7 @@ def _process_map_and_scat(

except TrendsError as e:
msg = f"Failed to calculate trends, and will skip. This was due to {e}"
logger.warning(msg)
logger.info(msg)

perstr = f"{per}-{season}"
map_stat[freq][perstr] = stats
Expand All @@ -1046,7 +1053,11 @@ def _process_map_and_scat(
obs = mod = jsdate = scat_dummy
else:
obs, mod = obs_vals.tolist(), mod_vals.tolist()
scat_data[site][perstr] = {"obs": obs, "mod": mod, "date": jsdate}
scat_data[site][perstr] = {
"obs": obs,
"mod": mod,
"date": jsdate,
}

return (map_data, scat_data)

Expand Down Expand Up @@ -1547,13 +1558,17 @@ def process_profile_data_for_regions(
subset = per_season_subset.filter_region(
region_id=region_id, check_country_meta=use_country
)

output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :])
output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :])
with ignore_warnings(
RuntimeWarning,
"Mean of empty slice",
"All-NaN slice encountered",
):
output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :])
output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :])

except (DataCoverageError, TemporalResolutionError) as e:
msg = f"Failed to access subset timeseries, and will skip. Reason was: {e}"
logger.warning(msg)
logger.info(msg)

output["obs"][freq][perstr] = np.nan
output["mod"][freq][perstr] = np.nan
Expand Down Expand Up @@ -1614,12 +1629,17 @@ def process_profile_data_for_stations(
== station_name, # in this case a station
] # Assumes ordering of station name matches

output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :])
output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :])
with ignore_warnings(
RuntimeWarning,
"Mean of empty slice",
"All-NaN slice encountered",
):
output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :])
output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :])

except (DataCoverageError, TemporalResolutionError) as e:
msg = f"Failed to access subset timeseries, and will skip. Reason was: {e}"
logger.warning(msg)
logger.info(msg)

output["obs"][freq][perstr] = np.nan
output["mod"][freq][perstr] = np.nan
Expand Down
31 changes: 26 additions & 5 deletions pyaerocom/aeroval/experiment_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
import aerovaldb

from pyaerocom import const
from pyaerocom._lowlevel_helpers import DirLoc, StrType, TypeValidator, sort_dict_by_name
from pyaerocom._lowlevel_helpers import (
DirLoc,
StrType,
TypeValidator,
sort_dict_by_name,
)
from pyaerocom.aeroval.collections import ObsCollection
from pyaerocom.aeroval.glob_defaults import (
VariableInfo,
Expand All @@ -32,7 +37,8 @@
from pyaerocom.variable_helpers import get_aliases

MapInfo = namedtuple(
"MapInfo", ["obs_network", "obs_var", "vert_code", "mod_name", "mod_var", "time_period"]
"MapInfo",
["obs_network", "obs_var", "vert_code", "mod_name", "mod_var", "time_period"],
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -94,9 +100,11 @@ def __init__(self, cfg: EvalSetup):
self.cfg = cfg
super().__init__(
cfg.proj_id,
cfg.path_manager.json_basedir
if cfg.path_manager.avdb_resource is None
else cfg.path_manager.json_basedir,
(
cfg.path_manager.json_basedir
if cfg.path_manager.avdb_resource is None
else cfg.path_manager.json_basedir
),
)

# dictionary that will be filled by json cleanup methods to check for
Expand Down Expand Up @@ -1068,6 +1076,19 @@ def add_profile_entry(
): # only store incremental increases in the layers
current[model_name]["z"].append(midpoint)

# old boilerplate to get around recursive_default_dict issues
if "obs" not in current[model_name]:
current[model_name]["obs"] = {}

if freq not in current[model_name]["obs"]:
current[model_name]["obs"][freq] = {}

if "mod" not in current[model_name]:
current[model_name]["mod"] = {}

if freq not in current[model_name]["mod"]:
current[model_name]["mod"][freq] = {}

for per in periods:
for season in seasons:
perstr = f"{per}-{season}"
Expand Down
16 changes: 15 additions & 1 deletion pyaerocom/io/read_earlinet.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ReadEarlinet(ReadUngriddedBase):
_FILEMASK = "*.*"

#: version log of this class (for caching)
__version__ = "0.17_" + ReadUngriddedBase.__baseversion__
__version__ = "0.18_" + ReadUngriddedBase.__baseversion__

#: Name of dataset (OBS_ID)
DATA_ID = const.EARLINET_NAME
Expand All @@ -35,6 +35,14 @@ class ReadEarlinet(ReadUngriddedBase):
#: default variables for read method
DEFAULT_VARS = ["bsc532aer", "ec532aer"]

# These are applied to all files. If new cloud filter names are discovered they should be added here.
# As of 20.08.24, however, there are still files with less reliable data which get through the filters.
# https://github.com/metno/pyaerocom/issues/1310
CLOUD_FILTERS = {
"cloud_mask_type": 0, # "no_cloudmask_available manual_cloudmask automatic_cloudmask"
"cirrus_contamination": 2, # "not_available no_cirrus cirrus_detected"
}

#: all data values that exceed this number will be set to NaN on read. This
#: is because iris, xarray, etc. assign a FILL VALUE of the order of e36
#: to missing data in the netcdf files
Expand Down Expand Up @@ -219,6 +227,12 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli
self.logger.debug(f"Reading file {filename}")

with xarray.open_dataset(filename, engine="netcdf4") as data_in:
for filter in self.CLOUD_FILTERS:
if filter in data_in.variables:
if data_in.variables[filter].item() == self.CLOUD_FILTERS[filter]:
self.logger.debug(f"Skipping {filename} due to cloud filtering")
continue

# getting the coords since no longer in metadata
# Put also just in the attributes. not sure why appears twice
data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64(
Expand Down

0 comments on commit 7c6d8b8

Please sign in to comment.