diff --git a/dfm_tools/xugrid_helpers.py b/dfm_tools/xugrid_helpers.py index 507972de..0650880a 100644 --- a/dfm_tools/xugrid_helpers.py +++ b/dfm_tools/xugrid_helpers.py @@ -279,101 +279,48 @@ def open_partitioned_dataset(file_nc:str, decode_fillvals:bool = False, remove_e def open_dataset_curvilinear(file_nc, - varn_lon='longitude', - varn_lat='latitude', - varn_vert_lon='vertices_longitude', #'grid_x' - varn_vert_lat='vertices_latitude', #'grid_y' - ij_dims=['i','j'], #['N','M'] + x_dim='i', # N + y_dim='j', # M + x_bounds='vertices_longitude', # grid_x + y_bounds='vertices_latitude', # grid_y convert_360to180=False, **kwargs): """ - This is a first version of a function that creates a xugrid UgridDataset from a curvilinear dataset like CMCC. Curvilinear means in this case 2D lat/lon variables and i/j indexing. The CMCC dataset does contain vertices, which is essential for conversion to ugrid. - It also works for WAQUA files that are converted with getdata + Construct a UgridDataset from a curvilinear grid with 2D lat/lon variables + with i/j indexes/dims, including vertices variables. Works for curvilinear + datasets like CMCC and also for WAQUA files that are converted with getdata """ - # TODO: maybe get varn_lon/varn_lat automatically with cf-xarray (https://github.com/xarray-contrib/cf-xarray) if 'chunks' not in kwargs: kwargs['chunks'] = {'time':1} - # data_vars='minimal' to avoid time dimension on vertices_latitude and others when opening multiple files at once - ds = xr.open_mfdataset(file_nc, data_vars="minimal", **kwargs) - - print('>> getting vertices from ds: ',end='') + # data_vars='minimal' to avoid time dimension on vertices_latitude and + # others when opening multiple files at once + print('>> open_mfdataset: ',end='') dtstart = dt.datetime.now() - vertices_longitude = ds.variables[varn_vert_lon].to_numpy() - vertices_longitude = vertices_longitude.reshape(-1,vertices_longitude.shape[-1]) - vertices_latitude = ds.variables[varn_vert_lat].to_numpy() - vertices_latitude = vertices_latitude.reshape(-1,vertices_latitude.shape[-1]) + ds = xr.open_mfdataset(file_nc, data_vars="minimal", **kwargs) print(f'{(dt.datetime.now()-dtstart).total_seconds():.2f} sec') - + # convert from 0to360 to -180 to 180 if convert_360to180: - vertices_longitude = (vertices_longitude+180) % 360 - 180 - - # face_xy = np.stack([longitude,latitude],axis=-1) - # face_coords_x, face_coords_y = face_xy.T - #a,b = np.unique(face_xy,axis=0,return_index=True) #TODO: there are non_unique face_xy values, inconvenient - face_xy_vertices = np.stack([vertices_longitude,vertices_latitude],axis=-1) - face_xy_vertices_flat = face_xy_vertices.reshape(-1,2) - uniq,inv = np.unique(face_xy_vertices_flat, axis=0, return_inverse=True) - #len(uniq) = 104926 >> amount of unique node coords - #uniq.max() = 359.9654541015625 >> node_coords_xy - #len(inv) = 422816 >> is length of face_xy_vertices.reshape(-1,2) - #inv.max() = 104925 >> node numbers - node_coords_x, node_coords_y = uniq.T - - face_node_connectivity = inv.reshape(face_xy_vertices.shape[:2]) #fnc.max() = 104925 - - #remove all faces that have only 1 unique node (does not result in a valid grid) #TODO: not used yet except for print - fnc_all_duplicates = (face_node_connectivity.T==face_node_connectivity[:,0]).all(axis=0) - - #create bool of cells with duplicate nodes (some have 1 unique node, some 3, all these are dropped) #TODO: support also triangles? - fnc_closed = np.c_[face_node_connectivity,face_node_connectivity[:,0]] - fnc_has_duplicates = (np.diff(fnc_closed,axis=1)==0).any(axis=1) - - #only keep cells that have 4 unique nodes - bool_combined = ~fnc_has_duplicates - print(f'WARNING: dropping {fnc_has_duplicates.sum()} faces with duplicate nodes ({fnc_all_duplicates.sum()} with one unique node)') - face_node_connectivity = face_node_connectivity[bool_combined] - - grid = xu.Ugrid2d(node_x=node_coords_x, - node_y=node_coords_y, - face_node_connectivity=face_node_connectivity, - fill_value=-1, - ) - - print('>> stacking ds i/j coordinates: ',end='') #fast - dtstart = dt.datetime.now() - face_dim = grid.face_dimension - # TODO: lev/time bnds are dropped, avoid this. maybe stack initial dataset since it would also simplify the rest of the function a bit - ds_stacked = ds.stack({face_dim:ij_dims}).sel({face_dim:bool_combined}) - latlon_vars = [varn_lon, varn_lat, varn_vert_lon, varn_vert_lat] - ds_stacked = ds_stacked.drop_vars(ij_dims + latlon_vars + [face_dim]) - print(f'{(dt.datetime.now()-dtstart).total_seconds():.2f} sec') + ds[x_bounds] = (ds[x_bounds]+180) % 360 - 180 - print('>> init uds: ',end='') #long + topology = {"mesh2d":{"x":x_dim, + "y":y_dim, + "x_bounds":x_bounds, + "y_bounds":y_bounds, + } + } + + print('>> convert to xugrid.UgridDataset: ',end='') dtstart = dt.datetime.now() - uds = xu.UgridDataset(ds_stacked,grids=[grid]) + uds = xu.UgridDataset.from_structured2d(ds, topology=topology) print(f'{(dt.datetime.now()-dtstart).total_seconds():.2f} sec') - - # drop 0-area cells (relevant for CMCC global datasets) - bool_zero_cell_size = uds.grid.area==0 - if bool_zero_cell_size.any(): - print(f"WARNING: dropping {bool_zero_cell_size.sum()} 0-sized cells from dataset") - uds = uds.isel({uds.grid.face_dimension: ~bool_zero_cell_size}) - - #remove faces that link to node coordinates that are nan (occurs in waqua models) - bool_faces_wnannodes = np.isnan(uds.grid.face_node_coordinates[:,:,0]).any(axis=1) - if bool_faces_wnannodes.any(): - print(f'>> drop {bool_faces_wnannodes.sum()} faces with nan nodecoordinates from uds: ',end='') #long - dtstart = dt.datetime.now() - uds = uds.sel({face_dim:~bool_faces_wnannodes}) - print(f'{(dt.datetime.now()-dtstart).total_seconds():.2f} sec') - return uds -def get_delft3d4_nanmask(x,y): +def delft3d4_get_nanmask(x,y): + # -999.999 in kivu and 0.0 in curvedbend, both in westernscheldt bool_0 = (x==0) & (y==0) bool_1 = (x==-999) & (y==-999) bool_2 = (x==-999.999) & (y==-999.999) @@ -381,6 +328,18 @@ def get_delft3d4_nanmask(x,y): return bool_mask +def delft3d4_stack_shifted_coords(da): + shift = 1 + np_stacked = np.stack([ + da, #ll + da.shift(MC=shift), #lr + da.shift(MC=shift, NC=shift), #ur + da.shift(NC=shift), #ul + ],axis=-1) + da_stacked = xr.DataArray(np_stacked, dims=("M","N","four")) + return da_stacked + + def open_dataset_delft3d4(file_nc, **kwargs): if 'chunks' not in kwargs: @@ -388,104 +347,60 @@ def open_dataset_delft3d4(file_nc, **kwargs): ds = xr.open_dataset(file_nc, **kwargs) + xcor_stacked = delft3d4_stack_shifted_coords(ds.XCOR) + ycor_stacked = delft3d4_stack_shifted_coords(ds.YCOR) + mask_xy = delft3d4_get_nanmask(xcor_stacked,ycor_stacked) + ds['xcor_stacked'] = xcor_stacked.where(~mask_xy) + ds['ycor_stacked'] = ycor_stacked.where(~mask_xy) + if ('U1' in ds.data_vars) and ('V1' in ds.data_vars): - #mask u and v separately with 0 to avoid high velocities (cannot be nan, since (nan+value)/2= nan instead of value=2 + # replace invalid values not with nan but with zero + # otherwise the spatial coverage is affected mask_u1 = (ds.U1==-999) | (ds.U1==-999.999) mask_v1 = (ds.V1==-999) | (ds.V1==-999.999) - u1_mn = ds.U1.where(~mask_u1,0) - v1_mn = ds.V1.where(~mask_v1,0) + u1_mn = ds.U1.where(~mask_u1, 0) + v1_mn = ds.V1.where(~mask_v1, 0) + + # minus 0.5 since padding=low so corner value is representative for previous face + # according to that logic, method=nearest might be better (or just rename the dims) + u1_mn_cen = u1_mn.interp(MC=u1_mn.MC-0.5, method='linear').rename({'MC':'M'}) + v1_mn_cen = v1_mn.interp(NC=v1_mn.NC-0.5, method='linear').rename({'NC':'N'}) + # TODO: since padding=low, just renaming the dims might even be better? + # u1_mn_cen = u1_mn.rename({'MC':'M'}) + # v1_mn_cen = v1_mn.rename({'NC':'N'}) + # >> could also be done with `ds = ds.swap_dims({"M":"MC","N":"NC"})` - #create combined uv mask (have to rename dimensions) + # create combined uv mask (have to rename dimensions) mask_u1_mn = mask_u1.rename({'MC':'M'}) mask_v1_mn = mask_v1.rename({'NC':'N'}) mask_uv1_mn = mask_u1_mn & mask_v1_mn - - #average U1/V1 values to M/N - u1_mn = (u1_mn + u1_mn.shift(MC=1))/2 #TODO: or MC=-1 - u1_mn = u1_mn.rename({'MC':'M'}) - u1_mn = u1_mn.where(~mask_uv1_mn,np.nan) #replace temporary zeros with nan - v1_mn = (v1_mn + v1_mn.shift(NC=1))/2 #TODO: or NC=-1 - v1_mn = v1_mn.rename({'NC':'N'}) - v1_mn = v1_mn.where(~mask_uv1_mn,np.nan) #replace temporary zeros with nan - ds = ds.drop_vars(['U1','V1']) #to avoid creating large chunks, alternative is to overwrite the vars with the MN-averaged vars, but it requires passing and updating of attrs + # drop all actual missing cells + u1_mn_cen = u1_mn_cen.where(~mask_uv1_mn) + v1_mn_cen = v1_mn_cen.where(~mask_uv1_mn) + + # to avoid creating large chunks, alternative is to overwrite the vars + # with the MN-averaged vars, but it requires passing and updating of attrs + ds = ds.drop_vars(['U1','V1']) - #compute ux/uy/umag/udir #TODO: add attrs to variables + # compute ux/uy/umag/udir + # TODO: add attrs to variables alfas_rad = np.deg2rad(ds.ALFAS) - vel_x = u1_mn*np.cos(alfas_rad) - v1_mn*np.sin(alfas_rad) - vel_y = u1_mn*np.sin(alfas_rad) + v1_mn*np.cos(alfas_rad) + vel_x = u1_mn_cen*np.cos(alfas_rad) - v1_mn_cen*np.sin(alfas_rad) + vel_y = u1_mn_cen*np.sin(alfas_rad) + v1_mn_cen*np.cos(alfas_rad) ds['ux'] = vel_x ds['uy'] = vel_y ds['umag'] = np.sqrt(vel_x**2 + vel_y**2) ds['udir'] = np.rad2deg(np.arctan2(vel_y, vel_x))%360 - mn_slice = slice(1,None) - ds = ds.isel(M=mn_slice,N=mn_slice) #cut off first values of M/N (centers), since they are fillvalues and should have different size than MC/NC (corners) - - #find and set nans in XCOR/YCOR arrays - mask_xy = get_delft3d4_nanmask(ds.XCOR,ds.YCOR) #-999.999 in kivu and 0.0 in curvedbend, both in westernscheldt - ds['XCOR'] = ds.XCOR.where(~mask_xy) - ds['YCOR'] = ds.YCOR.where(~mask_xy) - - #convert to ugrid - node_coords_x = ds.XCOR.to_numpy().ravel() - node_coords_y = ds.YCOR.to_numpy().ravel() - xcor_shape = ds.XCOR.shape - xcor_nvals = xcor_shape[0] * xcor_shape[1] - - #remove weird outlier values in kivu model - node_coords_x[node_coords_x<-1000] = np.nan - node_coords_y[node_coords_y<-1000] = np.nan - - #find nodes with nan coords - if not (np.isnan(node_coords_x) == np.isnan(node_coords_y)).all(): - raise Exception('node_coords_xy do not have nans in same location') - nan_nodes_bool = np.isnan(node_coords_x) - node_coords_x = node_coords_x[~nan_nodes_bool] - node_coords_y = node_coords_y[~nan_nodes_bool] - - node_idx_square = -np.ones(xcor_nvals,dtype=int) - node_idx_nonans = np.arange((~nan_nodes_bool).sum()) - node_idx_square[~nan_nodes_bool] = node_idx_nonans - node_idx = node_idx_square.reshape(xcor_shape) - face_node_connectivity = np.stack([node_idx[1:,:-1].ravel(), #ll - node_idx[1:,1:].ravel(), #lr - node_idx[:-1,1:].ravel(), #ur - node_idx[:-1,:-1].ravel(), #ul - ],axis=1) - - keep_faces_bool = (face_node_connectivity!=-1).sum(axis=1)==4 - - face_node_connectivity = face_node_connectivity[keep_faces_bool] - - grid = xu.Ugrid2d(node_x=node_coords_x, - node_y=node_coords_y, - face_node_connectivity=face_node_connectivity, - fill_value=-1, - ) - - face_dim = grid.face_dimension - ds_stacked = ds.stack({face_dim:('M','N')}).sel({face_dim:keep_faces_bool}) - ds_stacked = ds_stacked.drop_vars(['M','N','mesh2d_nFaces']) - uds = xu.UgridDataset(ds_stacked,grids=[grid]) - - uds = uds.drop_vars(['XCOR','YCOR','grid']) - uds = uds.drop_dims(['MC','NC']) #clean up dataset by dropping corner dims (drops also variabes with U/V masks and U/V/C bedlevel) - - # convert to xarray.Dataset to update/remove attrs - ds_temp = uds.ugrid.to_dataset() - - # set vertical dimensions attr - # TODO: would be more convenient to do within xu.Ugrid2d(): https://github.com/Deltares/xugrid/issues/195#issuecomment-2111841390 - grid_attrs = {"vertical_dimensions": ds.grid.attrs["vertical_dimensions"]} - ds_temp["mesh2d"] = ds_temp["mesh2d"].assign_attrs(grid_attrs) - - # drop attrs pointing to the removed grid variable (topology is now in mesh2d) - # TODO: this is not possible on the xu.UgridDataset directly - for varn in ds_temp.data_vars: - if "grid" in ds_temp[varn].attrs.keys(): - del ds_temp[varn].attrs["grid"] - - uds = xu.UgridDataset(ds_temp) + # TODO: consider using same dims for variables on cell corners and faces + # ds = ds.swap_dims({"M":"MC","N":"NC"}) + topology = {"mesh2d":{"x":"M", + "y":"N", + "x_bounds":"xcor_stacked", + "y_bounds":"ycor_stacked", + } + } + uds = xu.UgridDataset.from_structured2d(ds, topology=topology) return uds diff --git a/pyproject.toml b/pyproject.toml index d3d7f74d..85f2fd88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,8 +36,8 @@ dependencies = [ "netcdf4>=1.5.4", #bottleneck>=1.3.3 successfully pip installs in py39 "bottleneck>=1.3.3", - #xugrid>=0.12.1 fixes warnings, properly closes datasets and adds from_structured method - "xugrid>=0.12.2", + #xugrid>=0.12.3 adds from_structured2d + "xugrid>=0.12.3", #cdsapi>=0.7.5 resolves many deprecationwarnings "cdsapi>=0.7.5", #pydap>=3.4.0 is the first with support for newer python versions diff --git a/tests/examples/postprocess_CMCC_plotting.py b/tests/examples/postprocess_CMCC_plotting.py index 1358ee96..694fdf10 100644 --- a/tests/examples/postprocess_CMCC_plotting.py +++ b/tests/examples/postprocess_CMCC_plotting.py @@ -10,13 +10,21 @@ import dfm_tools as dfmt -for varn in ['uo', 'vo','thetao']: +for varn in ['uo', 'vo', 'thetao']: #TODO: for vo, solve "RuntimeWarning: divide by zero encountered in divide" file_nc = f'p:\\archivedprojects\\11206304-futuremares-rawdata-preps\\data\\CMIP6_BC\\CMCC-ESM2\\{varn}_Omon_CMCC-ESM2_ssp126_r1i1p1f1_gn_201501-203412.nc' - uds = dfmt.open_dataset_curvilinear(file_nc, convert_360to180=True) #TODO: check TODO in this function for improvements #TODO: plot_coastlines gives wrong result when axis from 0-360 + uds = dfmt.open_dataset_curvilinear( + file_nc, + x_dim='i', + y_dim='j', + x_bounds='vertices_longitude', + y_bounds='vertices_latitude', + convert_360to180=True, + ) uds = uds.ugrid.to_nonperiodic(xmax=180) - uds = uds.ugrid.sel(x=slice(-15,30), y=slice(30,70)) #slice to europe (arbitrary, but to visualy compare grids) + # slice to europe (arbitrary, but to visualy compare grids) + uds = uds.ugrid.sel(x=slice(-15,30), y=slice(30,70)) fig, ax = plt.subplots() uds[varn].isel(time=0,lev=0).ugrid.plot(center=False) diff --git a/tests/examples/postprocess_waqua_netcdf_convertedwith_getdata.py b/tests/examples/postprocess_waqua_netcdf_convertedwith_getdata.py index 314d81a0..c4307663 100644 --- a/tests/examples/postprocess_waqua_netcdf_convertedwith_getdata.py +++ b/tests/examples/postprocess_waqua_netcdf_convertedwith_getdata.py @@ -65,10 +65,13 @@ figsize = (16,7) #MAP RMM -uds = dfmt.open_dataset_curvilinear(file_nc_map, - varn_lon='XZETA', varn_lat='YZETA', - varn_vert_lon='grid_x', varn_vert_lat='grid_y', - ij_dims=['N','M']) +uds = dfmt.open_dataset_curvilinear( + file_nc_map, + x_dim='N', + y_dim='M', + x_bounds='grid_x', + y_bounds='grid_y', + ) uds_sel = uds.isel(TIME=timestep,LAYER=0) uds_rastered = dfmt.rasterize_ugrid(uds_sel,resolution=resolution)