Skip to content

Commit

Permalink
strategy is to create 2D layer time_series in loop
Browse files Browse the repository at this point in the history
  • Loading branch information
lewisblake committed Jul 13, 2023
1 parent 99beb3d commit 067bb2a
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 12 deletions.
40 changes: 33 additions & 7 deletions pyaerocom/colocation_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
import pandas as pd
import xarray as xr
import iris
from geonum.atmosphere import pressure

from pyaerocom import __version__ as pya_ver
Expand Down Expand Up @@ -142,13 +143,15 @@ def colocate_vertical_profile_gridded(

latitude = data.latitude.points
longitude = data.longitude.points
altitude = data.altitude.points
lat_range = [np.min(latitude), np.max(latitude)]
lon_range = [np.min(longitude), np.max(longitude)]
alt_range = [np.min(altitude), np.max(altitude)]
# use only sites that are within model domain
breakpoint()

# LB: filter_by_meta wipes is_vertical_profile
data_ref = data_ref.filter_by_meta(latitude=lat_range, longitude=lon_range)
# Also note that filter_by_meta may not be calling alt_range. Function fitler_altitude is defined but not used
data_ref = data_ref.filter_by_meta(latitude=lat_range, longitude=lon_range, altitude=alt_range)

# get timeseries from all stations in provided time resolution
# (time resampling is done below in main loop)
Expand All @@ -171,11 +174,17 @@ def colocate_vertical_profile_gridded(
f"Variable {var_ref} is not available in specified time interval ({start}-{stop})"
)

breakpoint() # need to make sure altitude of data comes along. when we get here the model level seems to be missing
# breakpoint() # need to make sure altitude of data comes along. when we get here the model level seems to be missing

# Reports: Inferring surface level in GriddedData based on mean value of ec532aer data in first and last level since CF coordinate info is missing... The level with the largest mean value will be assumed to be the surface. If mean values in both levels

grid_stat_data = data.to_time_series(longitude=ungridded_lons, latitude=ungridded_lats)
# grid_stat_data = data.to_time_series(
# longitude=ungridded_lons,
# latitude=ungridded_lats,
# vert_scheme="profile", # LB: testing this last arg. think needs to be profile
# )

breakpoint()

pd_freq = col_tst.to_pandas_freq()
time_idx = make_datetime_index(start, stop, pd_freq)
Expand Down Expand Up @@ -241,8 +250,25 @@ def colocate_vertical_profile_gridded(
# need to be updated, for details (or if errors occur), cf.
# UngriddedData.to_station_data, where the conversion happens)

# LB: maybe need to do something here like
# data_for_grid_stat_data = get_right_subset(data)
# this_layer_data = data.extract(
# iris.Constraint(
# coord_values={
# "altitude": lambda cell: vertical_layer["start"]
# < cell
# < vertical_layer["end"]
# }
# )
# )
# tmp = this_layer_data.grid.aggregated_by("altitude", iris.analysis.MEAN)

this_layer_data = this_layer_data.
breakpoint()

# get model station data
grid_stat = grid_stat_data[i]

# LB: want to do the same thing with grid_stat, but need some actual data to see what it looks like
tmp_grid_stat = grid_stat.copy()
tmp_grid_stat[var] = (
Expand All @@ -269,12 +295,12 @@ def colocate_vertical_profile_gridded(

# Make a copy of the station data and resample it to the mean based on hourly resolution. Needs testing!
tmp_obs_stat = obs_stat.copy()
# add the station altitude to the altitudes so everything is against Above Sea Level (ASL)
tmp_obs_stat.altitude += tmp_obs_stat.station_coords["altitude"]

tmp_obs_stat[var_ref] = (
tmp_obs_stat[var_ref][
(vertical_layer["start"] <= tmp_obs_stat.altitude)
(
vertical_layer["start"] <= tmp_obs_stat.altitude
) # altitude data should be given in terms of altitude above sea level
& (tmp_obs_stat.altitude < vertical_layer["end"])
]
.resample(rule="H")
Expand Down
19 changes: 17 additions & 2 deletions pyaerocom/griddeddata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1141,6 +1141,7 @@ def to_time_series(
f"Extracting timeseries data from large array (shape: {self.shape}). "
f"This may take a while..."
)

# if the method makes it to this point, it is 3 or 4 dimensional
# and the first 3 dimensions are time, latitude, longitude.
if self.ndim == 3: # data does not contain vertical dimension
Expand All @@ -1162,6 +1163,8 @@ def to_time_series(

if sample_points is None:
sample_points = self._coords_to_iris_sample_points(**coords)

# LB: collapse_scalar might not want to be true in this case
return self._to_timeseries_3D(
sample_points,
scheme,
Expand Down Expand Up @@ -1316,8 +1319,18 @@ def _to_timeseries_3D(
# Data contains vertical dimension
data = self._apply_vert_scheme(sample_points, vert_scheme)

# LB: There is a loop here. Presumably the first time to_time_series is called, it hits one of the previous cases for 2D data
# If not, it comes to this function, which modifies it in a way that when sent back to to_time_series(), it then will hit one of the 2D cases
# In stead we need to think about what those 2d cases are doing and how we can mimic it to profiles. Fear they must be station data objects in which
# case maybe it makes sense in the collocation_3d loop to

# ToDo: check if _to_timeseries_2D can be called here
return data.to_time_series(sample_points, scheme, collapse_scalar, add_meta=add_meta)
return data.to_time_series(
sample_points=sample_points,
scheme=scheme,
collapse_scalar=collapse_scalar,
add_meta=add_meta,
)

def _apply_vert_scheme(self, sample_points, vert_scheme):
"""Helper method that checks and infers vertical scheme for time
Expand Down Expand Up @@ -1349,7 +1362,9 @@ def _apply_vert_scheme(self, sample_points, vert_scheme):
"Cannot yet retrieve timeseries at altitude levels. Coming soon..."
)
elif vert_scheme == "profile":
raise NotImplementedError("Cannot yet retrieve profile timeseries")
# raise NotImplementedError("Cannot yet retrieve profile timeseries")
breakpoint()
return self
else:
try:
# check if vertical scheme can be converted into valid iris
Expand Down
6 changes: 4 additions & 2 deletions pyaerocom/io/read_earlinet.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def __init__(self, data_id=None, data_dir=None):
#: files that were actually excluded from reading
self.excluded_files = []

#Lb: testing putting attr here
# Lb: testing putting attr here
self.is_vertical_profile = True

def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outliers=True):
Expand Down Expand Up @@ -245,7 +245,9 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli
data_in["latitude"].values
)
data_out["altitude"] = np.float64(
data_in["altitude"].values
data_in[
"altitude"
].values # altitude is defined in EARLINET in terms- of altitude above sea level
) # Note altitude is an array for the data, station altitude is different
data_out["station_coords"]["altitude"] = np.float64(data_in.station_altitude)

Expand Down
2 changes: 1 addition & 1 deletion pyaerocom/ungriddeddata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1728,7 +1728,7 @@ def _find_meta_matches(self, negate=None, *filters):
# or find out why altitude is not included like var is
for var in meta["var_info"]:
if var == "altitude":
continue
continue # altitude is not actually a variable but is stored in var_info like one
try:
totnum += len(self.meta_idx[meta_idx][var])
except KeyError:
Expand Down

0 comments on commit 067bb2a

Please sign in to comment.