From 067bb2ac324b53c559666d9c0c659b8cdff27d25 Mon Sep 17 00:00:00 2001 From: lewisblake Date: Thu, 13 Jul 2023 13:38:30 +0000 Subject: [PATCH] strategy is to create 2D layer time_series in loop --- pyaerocom/colocation_3d.py | 40 +++++++++++++++++++++++++++++------ pyaerocom/griddeddata.py | 19 +++++++++++++++-- pyaerocom/io/read_earlinet.py | 6 ++++-- pyaerocom/ungriddeddata.py | 2 +- 4 files changed, 55 insertions(+), 12 deletions(-) diff --git a/pyaerocom/colocation_3d.py b/pyaerocom/colocation_3d.py index f1e394e98..5397f86ac 100644 --- a/pyaerocom/colocation_3d.py +++ b/pyaerocom/colocation_3d.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd import xarray as xr +import iris from geonum.atmosphere import pressure from pyaerocom import __version__ as pya_ver @@ -142,13 +143,15 @@ def colocate_vertical_profile_gridded( latitude = data.latitude.points longitude = data.longitude.points + altitude = data.altitude.points lat_range = [np.min(latitude), np.max(latitude)] lon_range = [np.min(longitude), np.max(longitude)] + alt_range = [np.min(altitude), np.max(altitude)] # use only sites that are within model domain - breakpoint() # LB: filter_by_meta wipes is_vertical_profile - data_ref = data_ref.filter_by_meta(latitude=lat_range, longitude=lon_range) + # Also note that filter_by_meta may not be calling alt_range. Function fitler_altitude is defined but not used + data_ref = data_ref.filter_by_meta(latitude=lat_range, longitude=lon_range, altitude=alt_range) # get timeseries from all stations in provided time resolution # (time resampling is done below in main loop) @@ -171,11 +174,17 @@ def colocate_vertical_profile_gridded( f"Variable {var_ref} is not available in specified time interval ({start}-{stop})" ) - breakpoint() # need to make sure altitude of data comes along. when we get here the model level seems to be missing + # breakpoint() # need to make sure altitude of data comes along. when we get here the model level seems to be missing # Reports: Inferring surface level in GriddedData based on mean value of ec532aer data in first and last level since CF coordinate info is missing... The level with the largest mean value will be assumed to be the surface. If mean values in both levels - grid_stat_data = data.to_time_series(longitude=ungridded_lons, latitude=ungridded_lats) + # grid_stat_data = data.to_time_series( + # longitude=ungridded_lons, + # latitude=ungridded_lats, + # vert_scheme="profile", # LB: testing this last arg. think needs to be profile + # ) + + breakpoint() pd_freq = col_tst.to_pandas_freq() time_idx = make_datetime_index(start, stop, pd_freq) @@ -241,8 +250,25 @@ def colocate_vertical_profile_gridded( # need to be updated, for details (or if errors occur), cf. # UngriddedData.to_station_data, where the conversion happens) + # LB: maybe need to do something here like + # data_for_grid_stat_data = get_right_subset(data) + # this_layer_data = data.extract( + # iris.Constraint( + # coord_values={ + # "altitude": lambda cell: vertical_layer["start"] + # < cell + # < vertical_layer["end"] + # } + # ) + # ) + # tmp = this_layer_data.grid.aggregated_by("altitude", iris.analysis.MEAN) + + this_layer_data = this_layer_data. + breakpoint() + # get model station data grid_stat = grid_stat_data[i] + # LB: want to do the same thing with grid_stat, but need some actual data to see what it looks like tmp_grid_stat = grid_stat.copy() tmp_grid_stat[var] = ( @@ -269,12 +295,12 @@ def colocate_vertical_profile_gridded( # Make a copy of the station data and resample it to the mean based on hourly resolution. Needs testing! tmp_obs_stat = obs_stat.copy() - # add the station altitude to the altitudes so everything is against Above Sea Level (ASL) - tmp_obs_stat.altitude += tmp_obs_stat.station_coords["altitude"] tmp_obs_stat[var_ref] = ( tmp_obs_stat[var_ref][ - (vertical_layer["start"] <= tmp_obs_stat.altitude) + ( + vertical_layer["start"] <= tmp_obs_stat.altitude + ) # altitude data should be given in terms of altitude above sea level & (tmp_obs_stat.altitude < vertical_layer["end"]) ] .resample(rule="H") diff --git a/pyaerocom/griddeddata.py b/pyaerocom/griddeddata.py index da48f9a73..aa39071bc 100644 --- a/pyaerocom/griddeddata.py +++ b/pyaerocom/griddeddata.py @@ -1141,6 +1141,7 @@ def to_time_series( f"Extracting timeseries data from large array (shape: {self.shape}). " f"This may take a while..." ) + # if the method makes it to this point, it is 3 or 4 dimensional # and the first 3 dimensions are time, latitude, longitude. if self.ndim == 3: # data does not contain vertical dimension @@ -1162,6 +1163,8 @@ def to_time_series( if sample_points is None: sample_points = self._coords_to_iris_sample_points(**coords) + + # LB: collapse_scalar might not want to be true in this case return self._to_timeseries_3D( sample_points, scheme, @@ -1316,8 +1319,18 @@ def _to_timeseries_3D( # Data contains vertical dimension data = self._apply_vert_scheme(sample_points, vert_scheme) + # LB: There is a loop here. Presumably the first time to_time_series is called, it hits one of the previous cases for 2D data + # If not, it comes to this function, which modifies it in a way that when sent back to to_time_series(), it then will hit one of the 2D cases + # In stead we need to think about what those 2d cases are doing and how we can mimic it to profiles. Fear they must be station data objects in which + # case maybe it makes sense in the collocation_3d loop to + # ToDo: check if _to_timeseries_2D can be called here - return data.to_time_series(sample_points, scheme, collapse_scalar, add_meta=add_meta) + return data.to_time_series( + sample_points=sample_points, + scheme=scheme, + collapse_scalar=collapse_scalar, + add_meta=add_meta, + ) def _apply_vert_scheme(self, sample_points, vert_scheme): """Helper method that checks and infers vertical scheme for time @@ -1349,7 +1362,9 @@ def _apply_vert_scheme(self, sample_points, vert_scheme): "Cannot yet retrieve timeseries at altitude levels. Coming soon..." ) elif vert_scheme == "profile": - raise NotImplementedError("Cannot yet retrieve profile timeseries") + # raise NotImplementedError("Cannot yet retrieve profile timeseries") + breakpoint() + return self else: try: # check if vertical scheme can be converted into valid iris diff --git a/pyaerocom/io/read_earlinet.py b/pyaerocom/io/read_earlinet.py index 6671b796c..43fded43d 100755 --- a/pyaerocom/io/read_earlinet.py +++ b/pyaerocom/io/read_earlinet.py @@ -171,7 +171,7 @@ def __init__(self, data_id=None, data_dir=None): #: files that were actually excluded from reading self.excluded_files = [] - #Lb: testing putting attr here + # Lb: testing putting attr here self.is_vertical_profile = True def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outliers=True): @@ -245,7 +245,9 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli data_in["latitude"].values ) data_out["altitude"] = np.float64( - data_in["altitude"].values + data_in[ + "altitude" + ].values # altitude is defined in EARLINET in terms- of altitude above sea level ) # Note altitude is an array for the data, station altitude is different data_out["station_coords"]["altitude"] = np.float64(data_in.station_altitude) diff --git a/pyaerocom/ungriddeddata.py b/pyaerocom/ungriddeddata.py index 2ffeee443..544e42ff8 100644 --- a/pyaerocom/ungriddeddata.py +++ b/pyaerocom/ungriddeddata.py @@ -1728,7 +1728,7 @@ def _find_meta_matches(self, negate=None, *filters): # or find out why altitude is not included like var is for var in meta["var_info"]: if var == "altitude": - continue + continue # altitude is not actually a variable but is stored in var_info like one try: totnum += len(self.meta_idx[meta_idx][var]) except KeyError: