From ccddc03ee55d8abd00daae14be28a25e79837a98 Mon Sep 17 00:00:00 2001 From: jasontempestholt <42639421+jasontempestholt@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:58:44 +0000 Subject: [PATCH] Updates to profile and stratification Passing variables needed for match to grid Remove this version of match to grid as provided in profile Profile sub setting across latitude wrap --- coast/data/profile.py | 35 +++++++--- coast/diagnostics/profile_stratification.py | 75 +-------------------- 2 files changed, 30 insertions(+), 80 deletions(-) diff --git a/coast/data/profile.py b/coast/data/profile.py index 8dfe7353..eed71661 100644 --- a/coast/data/profile.py +++ b/coast/data/profile.py @@ -153,9 +153,19 @@ def subset_indices_lonlat_box(self, lonbounds, latbounds): return: A new profile object containing subsetted data """ - ind = general_utils.subset_indices_lonlat_box( - self.dataset.longitude, self.dataset.latitude, lonbounds[0], lonbounds[1], latbounds[0], latbounds[1] + if lonbounds[0] < lonbounds[1]: + ind = general_utils.subset_indices_lonlat_box( + self.dataset.longitude, self.dataset.latitude, lonbounds[0], lonbounds[1], latbounds[0], latbounds[1] ) + else: + ind1 = general_utils.subset_indices_lonlat_box( + self.dataset.longitude, self.dataset.latitude, lonbounds[0], 180.0 , latbounds[0], latbounds[1] + ) + ind2 = general_utils.subset_indices_lonlat_box( + self.dataset.longitude, self.dataset.latitude, -180.0, lonbounds[1], latbounds[0], latbounds[1] + ) + ind={} + ind[0] = np.concatenate((ind1[0],ind2[0])) return Profile(dataset=self.dataset.isel(id_dim=ind[0])) def extract_en4_profiles(self, dataset_names, region_bounds, chunks: dict = {}): @@ -173,6 +183,7 @@ def extract_en4_profiles(self, dataset_names, region_bounds, chunks: dict = {}): y_max = region_bounds[3] # self.profile = Profile(config=config) self.read_en4(dataset_names, multiple=True, chunks=chunks) + pr = self.subset_indices_lonlat_box(lonbounds=[x_min, x_max], latbounds=[y_min, y_max]) pr = pr.process_en4() return pr @@ -498,7 +509,7 @@ def obs_operator(self, gridded, mask_bottom_level=True): mod_profiles["nearest_index_t"] = (["id_dim"], ind_t.values) return Profile(dataset=mod_profiles) - def match_to_grid(self, gridded, limits=[0, 0, 0, 0], rmax=7.0) -> None: + def match_to_grid(self, gridded, limits=[0, 0, 0, 0], rmax=25.0) -> None: """Match profiles locations to grid, finding 4 nearest neighbours for each profile. Args: @@ -516,13 +527,17 @@ def match_to_grid(self, gridded, limits=[0, 0, 0, 0], rmax=7.0) -> None: """ if sum(limits) != 0: - gridded.subset(ydim=range(limits[0], limits[1] + 0), xdim=range(limits[2], limits[3] + 1)) + gridded.subset(y_dim=range(limits[0], limits[1] + 1), x_dim=range(limits[2], limits[3] + 1)) # keep the grid or subset on the hydrographic profiles object gridded.dataset["limits"] = limits prf = self.dataset grd = gridded.dataset - grd["landmask"] = grd.bottom_level == 0 + if "bottom_level" in grd: + grd["landmask"] = grd.bottom_level == 0 + else: #resort to using bathymetry + grd["landmask"] = grd.bathymetry == 0 + lon_prf = prf["longitude"] lat_prf = prf["latitude"] lon_grd = grd["longitude"] @@ -590,7 +605,7 @@ def match_to_grid(self, gridded, limits=[0, 0, 0, 0], rmax=7.0) -> None: self.dataset["rmin_prf"] = xr.DataArray(rmin_prf, dims=["id_dim", "NNs"]) self.dataset["ind_good"] = xr.DataArray(ind_good, dims=["Ngood"]) - def gridded_to_profile_2d(self, gridded, variable) -> None: + def gridded_to_profile_2d(self, gridded, variable,limits=[0,0,0,0],rmax=25.0) -> None: """ Evaluated a gridded data variable on each profile. Here just 2D, but could be extended to 3 or 4D @@ -605,11 +620,15 @@ def gridded_to_profile_2d(self, gridded, variable) -> None: """ # ensure there are indices in profile if not "ind_x" in self.dataset: - self.match_to_grid(gridded) + self.match_to_grid(gridded,limits=limits,rmax=rmax) # prf = self.dataset grd = gridded.dataset - grd["landmask"] = grd.bottom_level == 0 + if "botton_level" in grd: + grd["landmask"] = grd.bottom_level == 0 + else: # resort to bathymetry for mask + grd["landmask"] = grd.bathymetry == 0 + nprof = self.dataset.id_dim.shape[0] var = np.ma.masked_where(grd["landmask"], grd[variable]) ig = prf.ind_good diff --git a/coast/diagnostics/profile_stratification.py b/coast/diagnostics/profile_stratification.py index 64047f15..6fe0921a 100644 --- a/coast/diagnostics/profile_stratification.py +++ b/coast/diagnostics/profile_stratification.py @@ -39,7 +39,7 @@ def __init__(self, profile: xr.Dataset): self.nz = profile.dataset.dims["z_dim"] debug(f"Initialised {get_slug(self)}") - def clean_data(profile: xr.Dataset, gridded: xr.Dataset, Zmax): + def clean_data(profile: xr.Dataset, gridded: xr.Dataset, Zmax,limits=[0,0,0,0],rmax=25.): """ Cleaning data for stratification metric calculations Stage 1:... @@ -68,7 +68,7 @@ def first_nonzero(arr, axis=0, invalid_val=np.nan): return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_val) if "bathymetry" in gridded.dataset: - profile.gridded_to_profile_2d(gridded, "bathymetry") + profile.gridded_to_profile_2d(gridded, "bathymetry",limits=limits,rmax=rmax) D_prf = profile.dataset.bathymetry.values z = profile.dataset.depth test_surface = z < np.minimum(dz_max, 0.25 * np.repeat(D_prf[:, np.newaxis], n_depth, axis=1)) @@ -143,7 +143,7 @@ def first_nonzero(arr, axis=0, invalid_val=np.nan): # %% return profile - def calc_pea(self, profile: xr.Dataset, gridded: xr.Dataset, Zmax): + def calc_pea(self, profile: xr.Dataset, gridded: xr.Dataset, Zmax, rmax=25.0, limits=[0,0,0,0]): """ Calculates Potential Energy Anomaly @@ -250,72 +250,3 @@ def quick_plot(self, var: xr.DataArray = None): return fig, ax ############################################################################## - def match_to_grid(self, gridded: xr.Dataset, limits: List = [0, 0, 0, 0], rmax=7000.0, grid_name="prf") -> None: - """Match profiles locations to grid, finding 4 nearest neighbours for each profile. - - Args: - gridded (Gridded): Gridded object. - limits (List): [jmin,jmax,imin,imax] - Subset to this region. - rmax (int): 7000 m - maxmimum search distance (metres). - - ### NEED TO DESCRIBE THE OUTPUT. WHAT DO i_prf, j_prf, rmin_prf REPRESENT? - - ### THIS LOOKS LIKE SOMETHING THE profile.obs_operator WOULD DO - """ - - if sum(limits) != 0: - gridded.subset(y_dim=range(limits[0], limits[1] + 1), x_dim=range(limits[2], limits[3] + 1)) - # gridded.spatial _subset(limits) might need this one for wrapping - # keep the bathymetry and mask or subset on the hydrographic profiles object - gridded.dataset["limits"] = limits - - lon_prf = self.dataset.longitude.values - lat_prf = self.dataset.latitude.values - - # Find 4 nearest neighbours on grid - j_prf, i_prf, rmin_prf = gridded.find_j_i_list(lat=lat_prf, lon=lon_prf, n_nn=4) - - self.dataset["i_min"] = limits[0] # reference back to origianl grid - self.dataset["j_min"] = limits[2] - - i_min = self.dataset.i_min.values - j_min = self.dataset.j_min.values - - # Sort 4 NN by distance on grid - ii = np.nonzero(np.isnan(lon_prf)) - i_prf[ii, :] = 0 - j_prf[ii, :] = 0 - ip = np.where(np.logical_or(i_prf[:, 0] != 0, j_prf[:, 0] != 0))[0] - lon_prf4 = np.repeat(lon_prf[ip, np.newaxis], 4, axis=1).ravel() - lat_prf4 = np.repeat(lat_prf[ip, np.newaxis], 4, axis=1).ravel() - r = np.ones(i_prf.shape) * np.nan - lon_grd = gridded.dataset.longitude.values - lat_grd = gridded.dataset.latitude.values - - rr = ProfileStratification.distance_on_grid( - lat_grd, lon_grd, j_prf[ip, :].ravel(), i_prf[ip, :].ravel(), lat_prf4, lon_prf4 - ) - r[ip, :] = np.reshape(rr, (ip.size, 4)) - # sort by distance - ii = np.argsort(r, axis=1) - rmin_prf = np.take_along_axis(r, ii, axis=1) - i_prf = np.take_along_axis(i_prf, ii, axis=1) - j_prf = np.take_along_axis(j_prf, ii, axis=1) - - ii = np.nonzero(np.logical_or(np.min(r, axis=1) > rmax, np.isnan(lon_prf))) - i_prf = i_prf + i_min - j_prf = j_prf + j_min - i_prf[ii, :] = 0 # should the be nan? - j_prf[ii, :] = 0 - - self.dataset[f"i_{grid_name}"] = xr.DataArray(i_prf, dims=["id_dim", "4"]) - self.dataset[f"j_{grid_name}"] = xr.DataArray(j_prf, dims=["id_dim", "4"]) - self.dataset[f"rmin_{grid_name}"] = xr.DataArray(rmin_prf, dims=["id_dim", "4"]) - # self.dataset[f"bathy_{grid_name}"] = gridded.dataset.bathymetry - # self.dataset[f"mask_{grid_name}"] = gridded.dataset.bottom_level != 0 - - def distance_on_grid(Y, X, jpts, ipts, Ypts, Xpts): - DX = (Xpts - X[jpts, ipts]) * earth_radius * np.cos(Ypts * np.pi / 180.0) - DY = (Ypts - Y[jpts, ipts]) * earth_radius - r = np.sqrt(DX**2 + DY**2) - return r