Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EARLINET cloud filtering #1315

Merged
merged 5 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions pyaerocom/aeroval/coldatatojson_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@
station_names=coldata.data.station_name.values,
periods=periods,
seasons=seasons,
obs_name=obs_name,
var_name_web=var_name_web,
)

logger.info(
Expand Down Expand Up @@ -276,6 +278,8 @@
station_names: ArrayLike = None,
periods: tuple[str, ...] = None,
seasons: tuple[str, ...] = None,
obs_name: str = None,
var_name_web: str = None,
):
if region_names is None and station_names is None:
raise ValueError("Both region_id and station_name can not both be None")
Expand All @@ -289,8 +293,16 @@
periods=periods,
seasons=seasons,
)

self.exp_output.add_profile_entry(data, profile_viz, periods, seasons)
location = region_names[regid]
self.exp_output.add_profile_entry(

Check warning on line 297 in pyaerocom/aeroval/coldatatojson_engine.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/coldatatojson_engine.py#L296-L297

Added lines #L296 - L297 were not covered by tests
data,
profile_viz,
periods,
seasons,
location=location,
network=obs_name,
obsvar=var_name_web,
)

# Loop through stations
for station_name in station_names:
Expand All @@ -302,7 +314,15 @@
seasons=seasons,
)

self.exp_output.add_profile_entry(data, profile_viz, periods, seasons)
self.exp_output.add_profile_entry(

Check warning on line 317 in pyaerocom/aeroval/coldatatojson_engine.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/coldatatojson_engine.py#L317

Added line #L317 was not covered by tests
data,
profile_viz,
periods,
seasons,
location=station_name,
network=obs_name,
obsvar=var_name_web,
)

def _process_stats_timeseries_for_all_regions(
self,
Expand Down
46 changes: 33 additions & 13 deletions pyaerocom/aeroval/coldatatojson_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,17 @@
from pyaerocom._warnings import ignore_warnings
from pyaerocom.aeroval.exceptions import ConfigError, TrendsError
from pyaerocom.aeroval.fairmode_stats import fairmode_stats
from pyaerocom.aeroval.helpers import _get_min_max_year_periods, _period_str_to_timeslice
from pyaerocom.aeroval.helpers import (
_get_min_max_year_periods,
_period_str_to_timeslice,
)
from pyaerocom.config import ALL_REGION_NAME
from pyaerocom.exceptions import DataCoverageError, TemporalResolutionError
from pyaerocom.region import Region, find_closest_region_coord, get_all_default_region_ids
from pyaerocom.region import (
Region,
find_closest_region_coord,
get_all_default_region_ids,
)
from pyaerocom.region_defs import HTAP_REGIONS_DEFAULT, OLD_AEROCOM_REGIONS
from pyaerocom.stats.stats import _init_stats_dummy, calculate_statistics
from pyaerocom.trends_engine import TrendsEngine
Expand Down Expand Up @@ -627,7 +634,7 @@
station_obs_trends.append(obs_trend)
except TrendsError as e:
msg = f"Failed to calculate trends, and will skip. This was due to {e}"
logger.warning(msg)
logger.info(msg)

Check warning on line 637 in pyaerocom/aeroval/coldatatojson_helpers.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/coldatatojson_helpers.py#L637

Added line #L637 was not covered by tests

if len(station_obs_trends) == 0 or len(station_mod_trends) == 0:
trends_successful = False
Expand Down Expand Up @@ -813,7 +820,7 @@
trends_successful = True
except TrendsError as e:
msg = f"Failed to calculate trends, and will skip. This was due to {e}"
logger.warning(msg)
logger.info(msg)

Check warning on line 823 in pyaerocom/aeroval/coldatatojson_helpers.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/coldatatojson_helpers.py#L823

Added line #L823 was not covered by tests

if trends_successful:
# The whole trends dicts are placed in the stats dict
Expand Down Expand Up @@ -1028,7 +1035,7 @@

except TrendsError as e:
msg = f"Failed to calculate trends, and will skip. This was due to {e}"
logger.warning(msg)
logger.info(msg)

Check warning on line 1038 in pyaerocom/aeroval/coldatatojson_helpers.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/coldatatojson_helpers.py#L1038

Added line #L1038 was not covered by tests

perstr = f"{per}-{season}"
map_stat[freq][perstr] = stats
Expand All @@ -1046,7 +1053,11 @@
obs = mod = jsdate = scat_dummy
else:
obs, mod = obs_vals.tolist(), mod_vals.tolist()
scat_data[site][perstr] = {"obs": obs, "mod": mod, "date": jsdate}
scat_data[site][perstr] = {
"obs": obs,
"mod": mod,
"date": jsdate,
}

return (map_data, scat_data)

Expand Down Expand Up @@ -1547,13 +1558,17 @@
subset = per_season_subset.filter_region(
region_id=region_id, check_country_meta=use_country
)

output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :])
output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :])
with ignore_warnings(
RuntimeWarning,
"Mean of empty slice",
"All-NaN slice encountered",
):
output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :])
output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :])

except (DataCoverageError, TemporalResolutionError) as e:
msg = f"Failed to access subset timeseries, and will skip. Reason was: {e}"
logger.warning(msg)
logger.info(msg)

output["obs"][freq][perstr] = np.nan
output["mod"][freq][perstr] = np.nan
Expand Down Expand Up @@ -1614,12 +1629,17 @@
== station_name, # in this case a station
] # Assumes ordering of station name matches

output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :])
output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :])
with ignore_warnings(
RuntimeWarning,
"Mean of empty slice",
"All-NaN slice encountered",
):
output["obs"][freq][perstr] = np.nanmean(subset.data[0, :, :])
output["mod"][freq][perstr] = np.nanmean(subset.data[1, :, :])

except (DataCoverageError, TemporalResolutionError) as e:
msg = f"Failed to access subset timeseries, and will skip. Reason was: {e}"
logger.warning(msg)
logger.info(msg)

output["obs"][freq][perstr] = np.nan
output["mod"][freq][perstr] = np.nan
Expand Down
46 changes: 38 additions & 8 deletions pyaerocom/aeroval/experiment_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
import aerovaldb

from pyaerocom import const
from pyaerocom._lowlevel_helpers import DirLoc, StrType, TypeValidator, sort_dict_by_name
from pyaerocom._lowlevel_helpers import (
DirLoc,
StrType,
TypeValidator,
sort_dict_by_name,
)
from pyaerocom.aeroval.collections import ObsCollection
from pyaerocom.aeroval.glob_defaults import (
VariableInfo,
Expand All @@ -32,7 +37,8 @@
from pyaerocom.variable_helpers import get_aliases

MapInfo = namedtuple(
"MapInfo", ["obs_network", "obs_var", "vert_code", "mod_name", "mod_var", "time_period"]
"MapInfo",
["obs_network", "obs_var", "vert_code", "mod_name", "mod_var", "time_period"],
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -94,9 +100,11 @@
self.cfg = cfg
super().__init__(
cfg.proj_id,
cfg.path_manager.json_basedir
if cfg.path_manager.avdb_resource is None
else cfg.path_manager.json_basedir,
(
cfg.path_manager.json_basedir
if cfg.path_manager.avdb_resource is None
else cfg.path_manager.json_basedir
),
)

# dictionary that will be filled by json cleanup methods to check for
Expand Down Expand Up @@ -1030,7 +1038,14 @@
)

def add_profile_entry(
self, data: ColocatedData, profile_viz: dict, periods: list[str], seasons: list[str]
self,
data: ColocatedData,
profile_viz: dict,
periods: list[str],
seasons: list[str],
location,
network,
obsvar,
):
"""Adds an entry for the colocated data to profiles.json.

Expand All @@ -1041,7 +1056,9 @@
seasons (list[str]): seasons to compute over (e.g., All, DJF, etc.)
"""
with self.avdb.lock():
current = self.avdb.get_profiles(self.proj_id, self.exp_id, default={})
current = self.avdb.get_profiles(

Check warning on line 1059 in pyaerocom/aeroval/experiment_output.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/experiment_output.py#L1059

Added line #L1059 was not covered by tests
self.proj_id, self.exp_id, location, network, obsvar, default={}
)
current = recursive_defaultdict(current)

for freq, coldata in data.items():
Expand All @@ -1059,6 +1076,19 @@
): # only store incremental increases in the layers
current[model_name]["z"].append(midpoint)

# old boilerplate to get around recursive_default_dict issues
if "obs" not in current[model_name]:
current[model_name]["obs"] = {}

Check warning on line 1081 in pyaerocom/aeroval/experiment_output.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/experiment_output.py#L1080-L1081

Added lines #L1080 - L1081 were not covered by tests

if freq not in current[model_name]["obs"]:
current[model_name]["obs"][freq] = {}

Check warning on line 1084 in pyaerocom/aeroval/experiment_output.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/experiment_output.py#L1083-L1084

Added lines #L1083 - L1084 were not covered by tests

if "mod" not in current[model_name]:
current[model_name]["mod"] = {}

Check warning on line 1087 in pyaerocom/aeroval/experiment_output.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/experiment_output.py#L1086-L1087

Added lines #L1086 - L1087 were not covered by tests

if freq not in current[model_name]["mod"]:
current[model_name]["mod"][freq] = {}

Check warning on line 1090 in pyaerocom/aeroval/experiment_output.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/experiment_output.py#L1089-L1090

Added lines #L1089 - L1090 were not covered by tests

for per in periods:
for season in seasons:
perstr = f"{per}-{season}"
Expand All @@ -1084,4 +1114,4 @@
}
current[model_name] = round_floats(current[model_name])

self.avdb.put_profiles(current, self.proj_id, self.exp_id)
self.avdb.put_profiles(current, self.proj_id, self.exp_id, location, network, obsvar)

Check warning on line 1117 in pyaerocom/aeroval/experiment_output.py

View check run for this annotation

Codecov / codecov/patch

pyaerocom/aeroval/experiment_output.py#L1117

Added line #L1117 was not covered by tests
16 changes: 15 additions & 1 deletion pyaerocom/io/read_earlinet.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ReadEarlinet(ReadUngriddedBase):
_FILEMASK = "*.*"

#: version log of this class (for caching)
__version__ = "0.17_" + ReadUngriddedBase.__baseversion__
__version__ = "0.18_" + ReadUngriddedBase.__baseversion__

#: Name of dataset (OBS_ID)
DATA_ID = const.EARLINET_NAME
Expand All @@ -35,6 +35,14 @@ class ReadEarlinet(ReadUngriddedBase):
#: default variables for read method
DEFAULT_VARS = ["bsc532aer", "ec532aer"]

# These are applied to all files. If new cloud filter names are discovered they should be added here.
# As of 20.08.24, however, there are still files with less reliable data which get through the filters.
# https://github.com/metno/pyaerocom/issues/1310
CLOUD_FILTERS = {
"cloud_mask_type": 0, # "no_cloudmask_available manual_cloudmask automatic_cloudmask"
"cirrus_contamination": 2, # "not_available no_cirrus cirrus_detected"
}

#: all data values that exceed this number will be set to NaN on read. This
#: is because iris, xarray, etc. assign a FILL VALUE of the order of e36
#: to missing data in the netcdf files
Expand Down Expand Up @@ -219,6 +227,12 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli
self.logger.debug(f"Reading file {filename}")

with xarray.open_dataset(filename, engine="netcdf4") as data_in:
for filter in self.CLOUD_FILTERS:
if filter in data_in.variables:
if data_in.variables[filter].item() == self.CLOUD_FILTERS[filter]:
self.logger.debug(f"Skipping {filename} due to cloud filtering")
continue

# getting the coords since no longer in metadata
# Put also just in the attributes. not sure why appears twice
data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64(
Expand Down
2 changes: 1 addition & 1 deletion pyaerocom_env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ dependencies:
- tqdm
- openpyxl
- typer >=0.7.0
- pydantic > 2.0.0
- pydantic >= 2.7.1
- pyproj >= 3.0.0
- pooch >=1.7.0
- psutil >= 5.0.0
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ classifiers = [
]
requires-python = ">=3.10"
dependencies = [
"aerovaldb == 0.0.15",
"aerovaldb==0.0.17",
"scitools-iris>=3.8.1",
"xarray>=2022.12.0",
"cartopy>=0.21.1",
Expand Down