diff --git a/build/lib/tobac/merge_split.py b/build/lib/tobac/merge_split.py new file mode 100644 index 00000000..0cbbc7e5 --- /dev/null +++ b/build/lib/tobac/merge_split.py @@ -0,0 +1,345 @@ +#Tobac merge and split v0.1 + +from geopy.distance import geodesic +from networkx import * +import numpy as np +from pandas.core.common import flatten +import xarray as xr + +def compress_all(nc_grids, min_dims=2): + for var in nc_grids: + if len(nc_grids[var].dims) >= min_dims: + # print("Compressing ", var) + nc_grids[var].encoding["zlib"] = True + nc_grids[var].encoding["complevel"] = 4 + nc_grids[var].encoding["contiguous"] = False + return nc_grids + + +def standardize_track_dataset(TrackedFeatures, Mask, Projection): + """ Combine a feature mask with the feature data table into a common dataset. + Also renames th + + returned by tobac.themes.tobac_v1.segmentation + with the TrackedFeatures dataset returned by tobac.themes.tobac_v1.linking_trackpy. + + Also rename the variables to be more desciptive and comply with cf-tree. + + Convert the default cell parent ID to an integer table. + + Add a cell dimension to reflect + + Projection is an xarray DataArray + + TODO: Add metadata attributes to + + """ + feature_standard_names = { + # new variable name, and long description for the NetCDF attribute + 'frame':('feature_time_index', + 'positional index of the feature along the time dimension of the mask, from 0 to N-1'), + 'hdim_1':('feature_hdim1_coordinate', + 'position of the feature along the first horizontal dimension in grid point space; a north-south coordinate for dim order (time, y, x).' + 'The numbering is consistent with positional indexing of the coordinate, but can be' + 'fractional, to account for a centroid not aligned to the grid.'), + 'hdim_2':('feature_hdim2_coordinate', + 'position of the feature along the second horizontal dimension in grid point space; an east-west coordinate for dim order (time, y, x)' + 'The numbering is consistent with positional indexing of the coordinate, but can be' + 'fractional, to account for a centroid not aligned to the grid.'), + 'idx':('feature_id_this_frame',), + 'num':('feature_grid_cell_count', + 'Number of grid points that are within the threshold of this feature'), + 'threshold_value':('feature_threshold_max', + "Feature number within that frame; starts at 1, increments by 1 to the number of features for each frame, and resets to 1 when the frame increments"), + 'feature':('feature_id', + "Unique number of the feature; starts from 1 and increments by 1 to the number of features"), + 'time':('feature_time','time of the feature, consistent with feature_time_index'), + 'timestr':('feature_time_str','String representation of the feature time, YYYY-MM-DD HH:MM:SS'), + 'projection_y_coordinate':('feature_projection_y_coordinate','y position of the feature in the projection given by ProjectionCoordinateSystem'), + 'projection_x_coordinate':('feature_projection_x_coordinate','x position of the feature in the projection given by ProjectionCoordinateSystem'), + 'lat':('feature_latitude','latitude of the feature'), + 'lon':('feature_longitude','longitude of the feature'), + 'ncells':('feature_ncells','number of grid cells for this feature (meaning uncertain)'), + 'areas':('feature_area',), + 'isolated':('feature_isolation_flag',), + 'num_objects':('number_of_feature_neighbors',), + 'cell':('feature_parent_cell_id',), + 'time_cell':('feature_parent_cell_elapsed_time',), + 'segmentation_mask':('2d segmentation mask',) + } + new_feature_var_names = {k:feature_standard_names[k][0] for k in feature_standard_names.keys() + if k in TrackedFeatures.variables.keys()} + + TrackedFeatures = TrackedFeatures.drop(['cell_parent_track_id']) + # Combine Track and Mask variables. Use the 'feature' variable as the coordinate variable instead of + # the 'index' variable and call the dimension 'feature' + ds = xr.merge([TrackedFeatures.swap_dims({'index':'feature'}).drop('index').rename_vars(new_feature_var_names), + Mask]) + + # Add the projection data back in + ds['ProjectionCoordinateSystem']=Projection + + # Convert the cell ID variable from float to integer + if 'int' not in str(TrackedFeatures.cell.dtype): + # The raw output from the tracking is actually an object array + # array([nan, 2, 3], dtype=object) + # (and is cast to a float array when saved as NetCDF, I think). + # Cast to float. + int_cell = xr.zeros_like(TrackedFeatures.cell, dtype='int64') + + cell_id_data = TrackedFeatures.cell.astype('float64') + valid_cell = np.isfinite(cell_id_data) + valid_cell_ids = cell_id_data[valid_cell] + if not (np.unique(valid_cell_ids) > 0).all(): + raise AssertionError('Lowest cell ID cell is less than one, conflicting with use of zero to indicate an untracked cell') + int_cell[valid_cell] = valid_cell_ids.astype('int64') + #ds['feature_parent_cell_id'] = int_cell + return ds + + + +def merge_split(TRACK,distance = 25000,frame_len = 5): + ''' + function to postprocess tobac track data for merge/split cells + Input: + TRACK: xarray dataset of tobac Track information + + distance: float, optional distance threshold prior to adding a pair of points + into the minimum spanning tree. Default is 25000 meters. + + frame_len: float, optional threshold for the spanning length within which two points + can be separated. Default is five (5) frames. + + + Output: + d: xarray dataset of + feature position along 1st horizontal dimension + hdim2_index: float + feature position along 2nd horizontal dimension + + Example: + d = merge_split(Track) + ds = standardize_track_dataset(Track, refl_mask, data['ProjectionCoordinateSystem']) + # both_ds = xarray.combine_by_coords((ds,d), compat='override') + both_ds = xr.merge([ds, d],compat ='override') + both_ds = compress_all(both_ds) + both_ds.to_netcdf(os.path.join(savedir,'Track_features_merges.nc')) + + ''' + track_groups = TRACK.groupby('cell') + cell_ids = {cid:len(v) for cid, v in track_groups.groups.items()} + id_data = np.fromiter(cell_ids.keys(), dtype=int) + count_data = np.fromiter(cell_ids.values(), dtype=int) + all_frames = np.sort(np.unique(TRACK.frame)) + a_points = list() + b_points = list() + a_names = list() + b_names = list() + dist = list() + + + for i in id_data: + #print(i) + a_pointx = track_groups[i].projection_x_coordinate[-1].values + a_pointy = track_groups[i].projection_y_coordinate[-1].values + for j in id_data: + b_pointx = track_groups[j].projection_x_coordinate[0].values + b_pointy = track_groups[j].projection_y_coordinate[0].values + d = np.linalg.norm(np.array((a_pointx, a_pointy)) - np.array((b_pointx, b_pointy))) + if d <= distance: + a_points.append([a_pointx,a_pointy]) + b_points.append([b_pointx, b_pointy]) + dist.append(d) + a_names.append(i) + b_names.append(j) + + + + + +# for i in id_data: +# a_pointx = track_groups[i].grid_longitude[-1].values +# a_pointy = track_groups[i].grid_latitude[-1].values +# for j in id_data: +# b_pointx = track_groups[j].grid_longitude[0].values +# b_pointy = track_groups[j].grid_latitude[0].values +# d = geodesic((a_pointy,a_pointx),(b_pointy,b_pointx)).km +# if d <= distance: +# a_points.append([a_pointx,a_pointy]) +# b_points.append([b_pointx, b_pointy]) +# dist.append(d) +# a_names.append(i) +# b_names.append(j) + + id = [] + for i in range(len(dist)-1, -1, -1): + if a_names[i] == b_names[i]: + id.append(i) + a_points.pop(i) + b_points.pop(i) + dist.pop(i) + a_names.pop(i) + b_names.pop(i) + else: + continue + + g = Graph() + for i in np.arange(len(dist)): + g.add_edge(a_names[i], b_names[i],weight=dist[i]) + + tree = minimum_spanning_edges(g) + tree_list = list(minimum_spanning_edges(g)) + + new_tree = [] + for i,j in enumerate(tree_list): + frame_a = np.nanmax(track_groups[j[0]].frame.values) + frame_b = np.nanmin(track_groups[j[1]].frame.values) + if np.abs(frame_a - frame_b) <= frame_len: + new_tree.append(tree_list[i][0:2]) + new_tree_arr = np.array(new_tree) + + TRACK['cell_parent_track_id'] = np.zeros(len(TRACK['cell'].values)) + cell_id = np.unique(TRACK.cell.values.astype(float)[~np.isnan(TRACK.cell.values.astype(float))]) + track_id = dict() #same size as number of total merged tracks + + arr = np.array([0]) + for p in cell_id: + j = np.where(arr == int(p)) + if len(j[0]) > 0: + continue + else: + k = np.where(new_tree_arr == p) + if len(k[0]) == 0: + track_id[p] = [p] + arr = np.append(arr,p) + else: + temp1 = list(np.unique(new_tree_arr[k[0]])) + temp = list(np.unique(new_tree_arr[k[0]])) + + for l in range(15): + for i in temp1: + k2 = np.where(new_tree_arr == i) + temp.append(list(np.unique(new_tree_arr[k2[0]]).squeeze())) + temp = list(flatten(temp)) + temp = list(np.unique(temp)) + + if len(temp1) == len(temp): + break + temp1 = np.array(temp) + + for i in temp1: + k2 = np.where(new_tree_arr == i) + temp.append(list(np.unique(new_tree_arr[k2[0]]).squeeze())) + + temp = list(flatten(temp)) + temp = list(np.unique(temp)) + arr = np.append(arr,np.unique(temp)) + + track_id[np.nanmax(np.unique(temp))] = list(np.unique(temp)) + + + + storm_id = [0] #default because we don't track larger storm systems *yet* + print('found storm id') + + + track_parent_storm_id = np.repeat(0, len(track_id)) #This will always be zero when we don't track larger storm systems *yet* + print('found track parent storm ids') + + track_ids = np.array(list(track_id.keys())) + print('found track ids') + + + cell_id = list(np.unique(TRACK.cell.values.astype(float)[~np.isnan(TRACK.cell.values.astype(float))])) + print('found cell ids') + + cell_parent_track_id = [] + + for i, id in enumerate(track_id): + + if len(track_id[int(id)]) == 1: + cell_parent_track_id.append(int(id)) + + else: + cell_parent_track_id.append(np.repeat(int(id),len(track_id[int(id)]))) + + + cell_parent_track_id = list(flatten(cell_parent_track_id)) + print('found cell parent track ids') + + feature_parent_cell_id = list(TRACK.cell.values.astype(float)) + + print('found feature parent cell ids') + + #This version includes all the feature regardless of if they are used in cells or not. + feature_id = list(TRACK.feature.values.astype(int)) + print('found feature ids') + + feature_parent_storm_id = np.repeat(0,len(feature_id)) #we don't do storms atm + print('found feature parent storm ids') + + feature_parent_track_id = [] + feature_parent_track_id = np.zeros(len(feature_id)) + for i, id in enumerate(feature_id): + cellid = feature_parent_cell_id[i] + if np.isnan(cellid): + feature_parent_track_id[i] = -1 + else: + j = np.where(cell_id == cellid) + j = np.squeeze(j) + trackid = cell_parent_track_id[j] + feature_parent_track_id[i] = trackid + + print('found feature parent track ids') + + + storm_child_track_count = [len(track_id)] + print('found storm child track count') + + track_child_cell_count = [] + for i,id in enumerate(track_id): + track_child_cell_count.append(len(track_id[int(id)])) + print('found track child cell count') + + + cell_child_feature_count = [] + for i,id in enumerate(cell_id): + cell_child_feature_count.append(len(track_groups[id].feature.values)) + print('found cell child feature count') + + storm_child_cell_count = [len(cell_id)] + storm_child_feature_count = [len(feature_id)] + + storm_dim = 'nstorms' + track_dim = 'ntracks' + cell_dim = 'ncells' + feature_dim = 'nfeatures' + + d = xr.Dataset({ + 'storm_id': (storm_dim, storm_id), + 'track_id': (track_dim, track_ids), + 'track_parent_storm_id': (track_dim, track_parent_storm_id), + 'cell_id': (cell_dim, cell_id), + 'cell_parent_track_id': (cell_dim, cell_parent_track_id), + 'feature_id': (feature_dim, feature_id), + 'feature_parent_cell_id': (feature_dim, feature_parent_cell_id), + 'feature_parent_track_id': (feature_dim, feature_parent_track_id), + 'feature_parent_storm_id': (feature_dim, feature_parent_storm_id), + 'storm_child_track_count': (storm_dim, storm_child_track_count), + 'storm_child_cell_count': (storm_dim, storm_child_cell_count), + 'storm_child_feature_count': (storm_dim, storm_child_feature_count), + 'track_child_cell_count': (track_dim, track_child_cell_count), + 'cell_child_feature_count': (cell_dim, cell_child_feature_count), + }) + d = d.set_coords(['feature_id','cell_id', 'track_id', 'storm_id']) + + assert len(track_id) == len(track_parent_storm_id) + assert len(cell_id) == len(cell_parent_track_id) + assert len(feature_id) == len(feature_parent_cell_id) + assert sum(storm_child_track_count) == len(track_id) + assert sum(storm_child_cell_count) == len(cell_id) + assert sum(storm_child_feature_count) == len(feature_id) + assert sum(track_child_cell_count) == len(cell_id) + assert sum([sum(cell_child_feature_count),(len(np.where(feature_parent_track_id < 0)[0]))]) == len(feature_id) + + return d \ No newline at end of file diff --git a/tobac/merge_split.py b/tobac/merge_split.py new file mode 100644 index 00000000..0cbbc7e5 --- /dev/null +++ b/tobac/merge_split.py @@ -0,0 +1,345 @@ +#Tobac merge and split v0.1 + +from geopy.distance import geodesic +from networkx import * +import numpy as np +from pandas.core.common import flatten +import xarray as xr + +def compress_all(nc_grids, min_dims=2): + for var in nc_grids: + if len(nc_grids[var].dims) >= min_dims: + # print("Compressing ", var) + nc_grids[var].encoding["zlib"] = True + nc_grids[var].encoding["complevel"] = 4 + nc_grids[var].encoding["contiguous"] = False + return nc_grids + + +def standardize_track_dataset(TrackedFeatures, Mask, Projection): + """ Combine a feature mask with the feature data table into a common dataset. + Also renames th + + returned by tobac.themes.tobac_v1.segmentation + with the TrackedFeatures dataset returned by tobac.themes.tobac_v1.linking_trackpy. + + Also rename the variables to be more desciptive and comply with cf-tree. + + Convert the default cell parent ID to an integer table. + + Add a cell dimension to reflect + + Projection is an xarray DataArray + + TODO: Add metadata attributes to + + """ + feature_standard_names = { + # new variable name, and long description for the NetCDF attribute + 'frame':('feature_time_index', + 'positional index of the feature along the time dimension of the mask, from 0 to N-1'), + 'hdim_1':('feature_hdim1_coordinate', + 'position of the feature along the first horizontal dimension in grid point space; a north-south coordinate for dim order (time, y, x).' + 'The numbering is consistent with positional indexing of the coordinate, but can be' + 'fractional, to account for a centroid not aligned to the grid.'), + 'hdim_2':('feature_hdim2_coordinate', + 'position of the feature along the second horizontal dimension in grid point space; an east-west coordinate for dim order (time, y, x)' + 'The numbering is consistent with positional indexing of the coordinate, but can be' + 'fractional, to account for a centroid not aligned to the grid.'), + 'idx':('feature_id_this_frame',), + 'num':('feature_grid_cell_count', + 'Number of grid points that are within the threshold of this feature'), + 'threshold_value':('feature_threshold_max', + "Feature number within that frame; starts at 1, increments by 1 to the number of features for each frame, and resets to 1 when the frame increments"), + 'feature':('feature_id', + "Unique number of the feature; starts from 1 and increments by 1 to the number of features"), + 'time':('feature_time','time of the feature, consistent with feature_time_index'), + 'timestr':('feature_time_str','String representation of the feature time, YYYY-MM-DD HH:MM:SS'), + 'projection_y_coordinate':('feature_projection_y_coordinate','y position of the feature in the projection given by ProjectionCoordinateSystem'), + 'projection_x_coordinate':('feature_projection_x_coordinate','x position of the feature in the projection given by ProjectionCoordinateSystem'), + 'lat':('feature_latitude','latitude of the feature'), + 'lon':('feature_longitude','longitude of the feature'), + 'ncells':('feature_ncells','number of grid cells for this feature (meaning uncertain)'), + 'areas':('feature_area',), + 'isolated':('feature_isolation_flag',), + 'num_objects':('number_of_feature_neighbors',), + 'cell':('feature_parent_cell_id',), + 'time_cell':('feature_parent_cell_elapsed_time',), + 'segmentation_mask':('2d segmentation mask',) + } + new_feature_var_names = {k:feature_standard_names[k][0] for k in feature_standard_names.keys() + if k in TrackedFeatures.variables.keys()} + + TrackedFeatures = TrackedFeatures.drop(['cell_parent_track_id']) + # Combine Track and Mask variables. Use the 'feature' variable as the coordinate variable instead of + # the 'index' variable and call the dimension 'feature' + ds = xr.merge([TrackedFeatures.swap_dims({'index':'feature'}).drop('index').rename_vars(new_feature_var_names), + Mask]) + + # Add the projection data back in + ds['ProjectionCoordinateSystem']=Projection + + # Convert the cell ID variable from float to integer + if 'int' not in str(TrackedFeatures.cell.dtype): + # The raw output from the tracking is actually an object array + # array([nan, 2, 3], dtype=object) + # (and is cast to a float array when saved as NetCDF, I think). + # Cast to float. + int_cell = xr.zeros_like(TrackedFeatures.cell, dtype='int64') + + cell_id_data = TrackedFeatures.cell.astype('float64') + valid_cell = np.isfinite(cell_id_data) + valid_cell_ids = cell_id_data[valid_cell] + if not (np.unique(valid_cell_ids) > 0).all(): + raise AssertionError('Lowest cell ID cell is less than one, conflicting with use of zero to indicate an untracked cell') + int_cell[valid_cell] = valid_cell_ids.astype('int64') + #ds['feature_parent_cell_id'] = int_cell + return ds + + + +def merge_split(TRACK,distance = 25000,frame_len = 5): + ''' + function to postprocess tobac track data for merge/split cells + Input: + TRACK: xarray dataset of tobac Track information + + distance: float, optional distance threshold prior to adding a pair of points + into the minimum spanning tree. Default is 25000 meters. + + frame_len: float, optional threshold for the spanning length within which two points + can be separated. Default is five (5) frames. + + + Output: + d: xarray dataset of + feature position along 1st horizontal dimension + hdim2_index: float + feature position along 2nd horizontal dimension + + Example: + d = merge_split(Track) + ds = standardize_track_dataset(Track, refl_mask, data['ProjectionCoordinateSystem']) + # both_ds = xarray.combine_by_coords((ds,d), compat='override') + both_ds = xr.merge([ds, d],compat ='override') + both_ds = compress_all(both_ds) + both_ds.to_netcdf(os.path.join(savedir,'Track_features_merges.nc')) + + ''' + track_groups = TRACK.groupby('cell') + cell_ids = {cid:len(v) for cid, v in track_groups.groups.items()} + id_data = np.fromiter(cell_ids.keys(), dtype=int) + count_data = np.fromiter(cell_ids.values(), dtype=int) + all_frames = np.sort(np.unique(TRACK.frame)) + a_points = list() + b_points = list() + a_names = list() + b_names = list() + dist = list() + + + for i in id_data: + #print(i) + a_pointx = track_groups[i].projection_x_coordinate[-1].values + a_pointy = track_groups[i].projection_y_coordinate[-1].values + for j in id_data: + b_pointx = track_groups[j].projection_x_coordinate[0].values + b_pointy = track_groups[j].projection_y_coordinate[0].values + d = np.linalg.norm(np.array((a_pointx, a_pointy)) - np.array((b_pointx, b_pointy))) + if d <= distance: + a_points.append([a_pointx,a_pointy]) + b_points.append([b_pointx, b_pointy]) + dist.append(d) + a_names.append(i) + b_names.append(j) + + + + + +# for i in id_data: +# a_pointx = track_groups[i].grid_longitude[-1].values +# a_pointy = track_groups[i].grid_latitude[-1].values +# for j in id_data: +# b_pointx = track_groups[j].grid_longitude[0].values +# b_pointy = track_groups[j].grid_latitude[0].values +# d = geodesic((a_pointy,a_pointx),(b_pointy,b_pointx)).km +# if d <= distance: +# a_points.append([a_pointx,a_pointy]) +# b_points.append([b_pointx, b_pointy]) +# dist.append(d) +# a_names.append(i) +# b_names.append(j) + + id = [] + for i in range(len(dist)-1, -1, -1): + if a_names[i] == b_names[i]: + id.append(i) + a_points.pop(i) + b_points.pop(i) + dist.pop(i) + a_names.pop(i) + b_names.pop(i) + else: + continue + + g = Graph() + for i in np.arange(len(dist)): + g.add_edge(a_names[i], b_names[i],weight=dist[i]) + + tree = minimum_spanning_edges(g) + tree_list = list(minimum_spanning_edges(g)) + + new_tree = [] + for i,j in enumerate(tree_list): + frame_a = np.nanmax(track_groups[j[0]].frame.values) + frame_b = np.nanmin(track_groups[j[1]].frame.values) + if np.abs(frame_a - frame_b) <= frame_len: + new_tree.append(tree_list[i][0:2]) + new_tree_arr = np.array(new_tree) + + TRACK['cell_parent_track_id'] = np.zeros(len(TRACK['cell'].values)) + cell_id = np.unique(TRACK.cell.values.astype(float)[~np.isnan(TRACK.cell.values.astype(float))]) + track_id = dict() #same size as number of total merged tracks + + arr = np.array([0]) + for p in cell_id: + j = np.where(arr == int(p)) + if len(j[0]) > 0: + continue + else: + k = np.where(new_tree_arr == p) + if len(k[0]) == 0: + track_id[p] = [p] + arr = np.append(arr,p) + else: + temp1 = list(np.unique(new_tree_arr[k[0]])) + temp = list(np.unique(new_tree_arr[k[0]])) + + for l in range(15): + for i in temp1: + k2 = np.where(new_tree_arr == i) + temp.append(list(np.unique(new_tree_arr[k2[0]]).squeeze())) + temp = list(flatten(temp)) + temp = list(np.unique(temp)) + + if len(temp1) == len(temp): + break + temp1 = np.array(temp) + + for i in temp1: + k2 = np.where(new_tree_arr == i) + temp.append(list(np.unique(new_tree_arr[k2[0]]).squeeze())) + + temp = list(flatten(temp)) + temp = list(np.unique(temp)) + arr = np.append(arr,np.unique(temp)) + + track_id[np.nanmax(np.unique(temp))] = list(np.unique(temp)) + + + + storm_id = [0] #default because we don't track larger storm systems *yet* + print('found storm id') + + + track_parent_storm_id = np.repeat(0, len(track_id)) #This will always be zero when we don't track larger storm systems *yet* + print('found track parent storm ids') + + track_ids = np.array(list(track_id.keys())) + print('found track ids') + + + cell_id = list(np.unique(TRACK.cell.values.astype(float)[~np.isnan(TRACK.cell.values.astype(float))])) + print('found cell ids') + + cell_parent_track_id = [] + + for i, id in enumerate(track_id): + + if len(track_id[int(id)]) == 1: + cell_parent_track_id.append(int(id)) + + else: + cell_parent_track_id.append(np.repeat(int(id),len(track_id[int(id)]))) + + + cell_parent_track_id = list(flatten(cell_parent_track_id)) + print('found cell parent track ids') + + feature_parent_cell_id = list(TRACK.cell.values.astype(float)) + + print('found feature parent cell ids') + + #This version includes all the feature regardless of if they are used in cells or not. + feature_id = list(TRACK.feature.values.astype(int)) + print('found feature ids') + + feature_parent_storm_id = np.repeat(0,len(feature_id)) #we don't do storms atm + print('found feature parent storm ids') + + feature_parent_track_id = [] + feature_parent_track_id = np.zeros(len(feature_id)) + for i, id in enumerate(feature_id): + cellid = feature_parent_cell_id[i] + if np.isnan(cellid): + feature_parent_track_id[i] = -1 + else: + j = np.where(cell_id == cellid) + j = np.squeeze(j) + trackid = cell_parent_track_id[j] + feature_parent_track_id[i] = trackid + + print('found feature parent track ids') + + + storm_child_track_count = [len(track_id)] + print('found storm child track count') + + track_child_cell_count = [] + for i,id in enumerate(track_id): + track_child_cell_count.append(len(track_id[int(id)])) + print('found track child cell count') + + + cell_child_feature_count = [] + for i,id in enumerate(cell_id): + cell_child_feature_count.append(len(track_groups[id].feature.values)) + print('found cell child feature count') + + storm_child_cell_count = [len(cell_id)] + storm_child_feature_count = [len(feature_id)] + + storm_dim = 'nstorms' + track_dim = 'ntracks' + cell_dim = 'ncells' + feature_dim = 'nfeatures' + + d = xr.Dataset({ + 'storm_id': (storm_dim, storm_id), + 'track_id': (track_dim, track_ids), + 'track_parent_storm_id': (track_dim, track_parent_storm_id), + 'cell_id': (cell_dim, cell_id), + 'cell_parent_track_id': (cell_dim, cell_parent_track_id), + 'feature_id': (feature_dim, feature_id), + 'feature_parent_cell_id': (feature_dim, feature_parent_cell_id), + 'feature_parent_track_id': (feature_dim, feature_parent_track_id), + 'feature_parent_storm_id': (feature_dim, feature_parent_storm_id), + 'storm_child_track_count': (storm_dim, storm_child_track_count), + 'storm_child_cell_count': (storm_dim, storm_child_cell_count), + 'storm_child_feature_count': (storm_dim, storm_child_feature_count), + 'track_child_cell_count': (track_dim, track_child_cell_count), + 'cell_child_feature_count': (cell_dim, cell_child_feature_count), + }) + d = d.set_coords(['feature_id','cell_id', 'track_id', 'storm_id']) + + assert len(track_id) == len(track_parent_storm_id) + assert len(cell_id) == len(cell_parent_track_id) + assert len(feature_id) == len(feature_parent_cell_id) + assert sum(storm_child_track_count) == len(track_id) + assert sum(storm_child_cell_count) == len(cell_id) + assert sum(storm_child_feature_count) == len(feature_id) + assert sum(track_child_cell_count) == len(cell_id) + assert sum([sum(cell_child_feature_count),(len(np.where(feature_parent_track_id < 0)[0]))]) == len(feature_id) + + return d \ No newline at end of file diff --git a/tobac/segmentation.py b/tobac/segmentation.py index c09b1c21..7719bc09 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -7,7 +7,7 @@ def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=Non return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance) -def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'): +def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',ISO_dilate = 8): """ Function performing watershedding for an individual timestep of the data @@ -25,13 +25,15 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu method: string flag determining the algorithm to use (currently watershedding implemented) max_distance: float - maximum distance from a marker allowed to be classified as belonging to that cell + maximum distance from a marker allowed to be classified as belonging to that cell + ISO_dilate: int + value by which to dilate the feature size for the isolation parameter. Default is 8 Output: segmentation_out: iris.cube.Cube - cloud mask, 0 outside and integer numbers according to track inside the clouds + cloud mask, 0 outside and integer numbers according to track inside the clouds features_out: pandas.DataFrame - feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep + feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep """ # The location of watershed within skimage submodules changes with v0.19, but I've kept both for backward compatibility for now try: @@ -40,6 +42,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu from skimage.morphology import watershed # from skimage.segmentation import random_walker from scipy.ndimage import distance_transform_edt + from skimage.morphology import dilation, disk from copy import deepcopy import numpy as np @@ -120,6 +123,33 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu else: raise ValueError('unknown method, must be watershed') + #create isolation, currently only available for 2d tracking + if field_in.ndim==2: + nobj = features_in['feature'].values + nobj_len = len(nobj) + iso = np.empty(nobj_len, dtype='bool') + iso[:] = False + num_obj_around = np.zeros(nobj_len, dtype = 'int') + segmask2 = dilation(segmentation_mask,disk(ISO_dilate)) #formerly square + for iso_id in np.arange(nobj_len): + if iso_id == 0: + continue + obj_ind = np.where(segmask2 == nobj[iso_id]) + objects = np.unique(segmentation_mask[obj_ind]) + if len(objects) >= 3: #one object will always be 0, the element of the feature, any more than those two are considered neighbors + iso[iso_id] = True + num_obj_around[iso_id] = len(objects)-1 #this subtracts the 0 element included in the count + + features_out['isolated'] = iso + features_out['num_objects'] = num_obj_around + + + + + + + + # remove everything from the individual masks that is more than max_distance_pixel away from the markers if max_distance is not None: D=distance_transform_edt((markers==0).astype(int)) @@ -139,7 +169,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu return segmentation_out,features_out -def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'): +def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',ISO_dilate = 8): """ Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts @@ -162,6 +192,11 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m max_distance: float Maximum distance from a marker allowed to be classified as belonging to that cell + ISO_dilate: int + value by which to dilate the feature size for the isolation parameter. + Default is 8 + + Output: segmentation_out: iris.cube.Cube Cloud mask, 0 outside and integer numbers according to track inside the cloud @@ -169,6 +204,10 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m import pandas as pd from iris.cube import CubeList + features['isolated'] = None + features['num_objects'] = None + print(['iso_dilate:'+str(ISO_dilate)]) + logging.info('Start watershedding 3D') # check input for right dimensions: @@ -186,7 +225,7 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m for i,field_i in enumerate(field_time): time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0]) features_i=features.loc[features['time']==time_i] - segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord) + segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord, ISO_dilate = ISO_dilate) segmentation_out_list.append(segmentation_out_i) features_out_list.append(features_out_i) logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S'))