From c03f4a646a9264a421d5a6877ae86c1f4f1350af Mon Sep 17 00:00:00 2001 From: Victor Perez Meza Date: Fri, 10 Jan 2025 14:00:21 +0100 Subject: [PATCH] added exceptions script to better handle missing channels --- macsima2mc/CLI.py | 4 +- macsima2mc/exceptions.py | 92 ++++++++++++++++++++ macsima2mc/illumination_corr.py | 11 ++- macsima2mc/macsima2mc.py | 3 +- macsima2mc/ome_schema.py | 15 ++-- macsima2mc/tools.py | 146 ++++++++++++++++++++------------ 6 files changed, 206 insertions(+), 65 deletions(-) create mode 100644 macsima2mc/exceptions.py diff --git a/macsima2mc/CLI.py b/macsima2mc/CLI.py index 8d09b8b..5a80a7e 100644 --- a/macsima2mc/CLI.py +++ b/macsima2mc/CLI.py @@ -24,13 +24,13 @@ def get_args(): type=pathlib.Path, help='Path where the stacks will be saved. If directory does not exist it will be created.' ) - + #Optional arguments parser.add_argument('-rm', '--reference_marker', default='DAPI', help='string specifying the name of the reference marker' ) - + parser.add_argument('-od', '--output_dir', default='raw', diff --git a/macsima2mc/exceptions.py b/macsima2mc/exceptions.py new file mode 100644 index 0000000..391ba83 --- /dev/null +++ b/macsima2mc/exceptions.py @@ -0,0 +1,92 @@ +import numpy as np +import pandas as pd + + +def any_ref(mf_tuple, + ref_marker='DAPI'): + """ + This function checks if the reference marker is present in the mf_tuple. + Args: + mf_tuple (tuple): tuple with the markers and filters + ref_marker (str): reference marker used for registration + Returns: + bool: True if the reference marker is present in the mf_tuple, False otherwise. + """ + + exist_ref = False + for m in mf_tuple: + if m[0] == ref_marker: + exist_ref = True + break + return exist_ref + +def miss_channel(chann_index,chans_meta,conformed_markers): + + if chans_meta[ chann_index ].size: + pass + else: + + ref_index=np.argwhere([meta.size > 0 for meta in chans_meta ]).flatten()[0] + chann_meta=chans_meta[ ref_index ] + edit_feats={'marker':conformed_markers[ chann_index ][0], + 'filter':conformed_markers[ chann_index ][1]} + #'exposure?':} + #how are conformed markers formed? + return True + + + +def at_roi(grouped,dimensions,ref_marker): + #for dimensions is expected that the last element of the list is the exposure_level + rows=[] + for index,frame in grouped: + marker_filter_map = list( frame.groupby(["marker","filter"]).indices.keys() ) + ref_exists=any_ref(marker_filter_map,ref_marker) + subst_exp_level=index[-1] + rows.append( list(index) + [ ref_exists, subst_exp_level ] ) + + df=pd.DataFrame(rows,columns=dimensions+['ref_marker','aux_exp_level']) + + frames=[] + for index,frame in df.groupby(dimensions[:-1]): + substitute=frame.loc[ frame['ref_marker']==True,'exposure_level'].max() + frame['aux_exp_level']=frame['aux_exp_level'].where(frame['ref_marker'],substitute) + frames.append(frame) + + return pd.concat(frames) + +def at_tile(group,marker_filter_map): + rows=[] + for tile_id,frame in group.groupby(['tile']): + aux_array = [ frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)].size + for marker, filt in marker_filter_map + ] + + aux_array =np.array(aux_array)==0 + missing_channel=np.any(aux_array) + + if missing_channel: + channel_indices=np.argwhere(aux_array).flatten() + #Each missing (marker,filter) pair will be separated by a colon ":", i.e. marker1,filter1:marker2,filter2 + channel= ':'.join( [ ','.join(marker_filter_map[i]) for i in channel_indices] ) + else: + channel='' + + qc_data=[tile_id[0],missing_channel,channel,tile_id[0]] + cols=['tile','missing_ch','channels','aux_tile'] + rows.append( qc_data ) + + df=pd.DataFrame(rows,columns=cols) + auxiliary_tile_index=df.loc[df['missing_ch']==False ][['tile']].values[0] + df.loc[ df['missing_ch']==True, ['aux_tile']]=auxiliary_tile_index + + return df + + + + + + + + + diff --git a/macsima2mc/illumination_corr.py b/macsima2mc/illumination_corr.py index 735b8eb..8863519 100644 --- a/macsima2mc/illumination_corr.py +++ b/macsima2mc/illumination_corr.py @@ -28,6 +28,13 @@ def extract_channel_imgs (stack,indices): return stack[indices,:,:] +def filter_out_blanks(channel_tiles): + + signal_indices=[index for index in range(0,channel_tiles.shape[0]) if np.sum(channel_tiles[index,:,:])>0 ] + filtered_tiles=channel_tiles[signal_indices,:,:] + return filtered_tiles + + def apply_corr(uncorr_stack,no_of_channels): """ @@ -46,7 +53,9 @@ def apply_corr(uncorr_stack,no_of_channels): for ind_list in indices: uncorr_imgs = extract_channel_imgs(uncorr_stack, ind_list) - basic.fit(uncorr_imgs) + imgs_for_ffp=filter_out_blanks(uncorr_imgs) + #basic.fit(uncorr_imgs) + basic.fit(imgs_for_ffp) ffp = basic.flatfield corr_stack[ind_list,:,:] = np.uint16(np.clip(uncorr_imgs.astype(float) / ffp, 0, 65535)) diff --git a/macsima2mc/macsima2mc.py b/macsima2mc/macsima2mc.py index ae1160e..44d94fc 100644 --- a/macsima2mc/macsima2mc.py +++ b/macsima2mc/macsima2mc.py @@ -22,7 +22,8 @@ def main(): # Create stack cycle_info = tools.append_metadata( cycle_info ) - #cycle_info.to_csv( args.output / 'cycle_{c}_info.csv'.format(c=f'{6:03d}'), index=False ) + + #cycle_info.to_csv( args.output / 'cycle_{c}_info.csv'.format(c=f'{1:03d}'), index=False ) output_dirs = tools.create_stack(cycle_info, output, ref_marker=ref, diff --git a/macsima2mc/ome_schema.py b/macsima2mc/ome_schema.py index 0ed8966..0ca499e 100644 --- a/macsima2mc/ome_schema.py +++ b/macsima2mc/ome_schema.py @@ -5,6 +5,7 @@ import platform + def INPUTS(frame, conformed_markers): """ This function creates a dictionary with the metadata of the tiles. @@ -15,12 +16,14 @@ def INPUTS(frame, conformed_markers): dict: dictionary with the metadata of the tiles. """ - features = frame.columns.values - inputs = {column:[] for column in features } - metadata = [frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)] for marker, filt in conformed_markers] - for meta in metadata: - for f in features: - inputs[f].append(meta[f].values[0]) + #features = frame.columns.values + #inputs = {column:[] for column in features } + #metadata = [frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)] for marker, filt in conformed_markers] + #for meta in metadata: + # for f in features: + # inputs[f].append(meta[f].values[0])2 + inputs=frame.to_dict('list') + return inputs diff --git a/macsima2mc/tools.py b/macsima2mc/tools.py index 9ec116d..5a0e3d7 100644 --- a/macsima2mc/tools.py +++ b/macsima2mc/tools.py @@ -1,5 +1,4 @@ from tkinter.font import names - from templates import info_dic import re import pandas as pd @@ -8,6 +7,7 @@ import numpy as np from pathlib import Path import ome_writer +import exceptions as expt import illumination_corr @@ -15,7 +15,7 @@ def merge_dicts(list_of_dicts): """ This function merges a list of dictionaries into a single dictionary where the values are stored in lists. Args: - list_of_dicts (list): list of dictionaries to merge + list_of_dicts (list): list of dictionaries with common keys Returns: merged_dict (dict): dictionary with the values stored in lists """ @@ -147,28 +147,8 @@ def conform_markers(mf_tuple, markers.insert(0,(ref_marker,ref_marker)) return markers -def any_ref(mf_tuple, - ref_marker='DAPI'): - """ - This function checks if the reference marker is present in the mf_tuple. - Args: - mf_tuple (tuple): tuple with the markers and filters - ref_marker (str): reference marker used for registration - Returns: - bool: True if the reference marker is present in the mf_tuple, False otherwise. - """ - - exist_ref = False - for m in mf_tuple: - if m[0] == ref_marker: - exist_ref = True - break - return exist_ref - - -def init_stack(ref_tile_index, - groupby_obj, - marker_filter_map): +def init_stack(group, + no_of_channels): """ This function initializes the stack array with the dimensions of the tiles. Args: @@ -179,12 +159,15 @@ def init_stack(ref_tile_index, np.ndarray: array with the dimensions of the stack array (depth, height, width) and the dtype of the reference tile. """ - ref_tile = groupby_obj.get_group((ref_tile_index,)) - total_tiles = len(groupby_obj) - width = ref_tile.size_x.values[0] - height = ref_tile.size_y.values[0] - depth = total_tiles*len(marker_filter_map) - stack = np.zeros( (depth,int(height),int(width)), dtype=ref_tile.type.values[0]) + aux_array=[ group['size_x'].unique() , group['size_y'].unique(), group['type'].unique() ] + check_array=np.array( [ len(element) for element in aux_array ] ) + if np.any(check_array>1): + print("Warning:tiles of these acquisition have no unique value for the following columns: xy-size or data type") + width, height, data_type = [ element[0] for element in aux_array ] + total_tiles = group['tile'].nunique() + depth = total_tiles * no_of_channels + + stack = np.zeros( (depth,int(height),int(width)), dtype=data_type) return stack @@ -257,7 +240,7 @@ def outputs_dic(): def select_by_exposure(list_indices, - exp_index=4, + exp_index, target='max'): """ This function selects the indices with the maximum or minimum exposure time. @@ -280,6 +263,45 @@ def select_by_exposure(list_indices, return selected_indices +def append_reference(frame,frame_index,groups,aux_reference_exp_level,ref_marker_name): + + aux_index = list(frame_index) + aux_index[-1] = aux_reference_exp_level + aux_index = tuple(aux_index) + aux_group = groups.get_group(aux_index) + aux_group = aux_group.loc[aux_group['marker']==ref_marker_name] + frame_ = pd.concat([frame, aux_group]) + return frame_ + +def append_missing_channels(group, exception_table ): + #exception_table.cols=["tile","missing_ch","channels","aux_tile"] + tiles=group.groupby(['tile']) + incomplete_tiles=exception_table.loc[exception_table['missing_ch']==True ] + add_tiles=[] + for row in incomplete_tiles.itertuples(index=False): + aux_tile_df=tiles.get_group( (row.aux_tile,) ) + missing_markers=[ tuple(element.split(',')) for element in row.channels.split(':') ] + for marker, filt in missing_markers: + df=aux_tile_df.loc[ (aux_tile_df['marker']==marker) & (aux_tile_df['filter']==filt) ] + #df['tile']=row.tile + df.loc[:,'tile']=row.tile + add_tiles.append(df) + + aux_group=pd.concat(add_tiles) + aux_group[ ['full_path','img_name'] ]='missing' + group_=pd.concat([group,aux_group]) + + return group_ + +def conform_acquisition_group(group,conformed_markers): + aux=[] + for tile_id,frame in group.groupby(['tile']): + aux.extend([ frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)] for marker, filt in conformed_markers ]) + group_conformed=pd.concat(aux) + + return group_conformed + + def create_stack(cycle_info_df, output_dir, @@ -307,42 +329,56 @@ def create_stack(cycle_info_df, else: out = {'output_paths':[]} - acq_group = cycle_info_df.groupby(['source','rack','well','roi','exposure_level']) + #'exposure_level' should always be the last element of the dimensions list + dimensions=['source','rack','well','roi','exposure_level'] + + acq_group = cycle_info_df.groupby(dimensions) acq_index = list( acq_group.indices.keys() ) + expt_matrix_roi=expt.at_roi(acq_group,dimensions,ref_marker).groupby(dimensions)#exceptions matrix if hi_exp: - acq_index = select_by_exposure(acq_index) + exp_level_index=np.argwhere( np.asarray(dimensions)=='exposure_level' ).flatten()[0] + acq_index = select_by_exposure(acq_index,exp_level_index,target='max') for index in acq_index: stack_output_dir = output_dir / cast_outdir_name(index) / out_folder stack_output_dir.mkdir(parents=True, exist_ok=True) group = acq_group.get_group(index) - #use tile 1 as reference to determine the height and width of the tiles - tile_no = group.tile.values - ref_tile = group.groupby(['tile']).get_group((tile_no[0],)) - marker_filter_map = list(ref_tile.groupby(["marker","filter"]).indices.keys()) - exist_ref = any_ref(marker_filter_map,ref_marker) + exist_ref, aux_reference= expt_matrix_roi.get_group(index )[ ['ref_marker','aux_exp_level'] ].iloc[0].values if not exist_ref: - index_aux = list(index) - index_aux[-1] = 1 - index_aux = tuple(index_aux) - aux_group = acq_group.get_group(index_aux) - aux_group = aux_group.loc[aux_group['marker']==ref_marker] - group = pd.concat([group, aux_group]) - - #group.to_csv(stack_output_dir.parent.absolute() /'info.csv' ) - groups_of_tiles = group.groupby(['tile']) + group=append_reference(group, index, acq_group, aux_reference, ref_marker) + #extract list of unique pairs (marker,filter) + marker_filter_map=group[['marker','filter']].value_counts().index.values + #conform the pairs (marker,filter) as to have the reference marker in the first place (1st channel) of the list conformed_markers = conform_markers(marker_filter_map, ref_marker) - stack = init_stack(tile_no[0], groups_of_tiles, conformed_markers) - ome = ome_writer.create_ome(group, conformed_markers) + + expt_matrix_tiles=expt.at_tile(group,conformed_markers) + if expt_matrix_tiles['missing_ch'].any(): + group=append_missing_channels(group,expt_matrix_tiles) + + stack = init_stack(group, len( conformed_markers)) + conformed_group=conform_acquisition_group(group,conformed_markers) + #ome = ome_writer.create_ome(group, conformed_markers) + ome = ome_writer.create_ome(conformed_group, conformed_markers) counter = 0 - - for tile_no, frame in groups_of_tiles: - for marker, filter in conformed_markers: - target_path = frame.loc[ (frame['marker']==marker) & (frame['filter']==filter) ].full_path.values[0] - stack[counter,:,:] = tifff.imread(Path(target_path)) + groups_of_tiles = conformed_group.groupby(['tile']) + #for tile_no, frame in groups_of_tiles: + # for marker, filter in conformed_markers: + # target_path = frame.loc[ (frame['marker']==marker) & (frame['filter']==filter) ].full_path.values[0] + # stack[counter,:,:] = tifff.imread(Path(target_path)) + # counter += 1 + for tile_id,frame in groups_of_tiles: + for img_path in frame['full_path'].values: + try: + stack[counter,:,:] = tifff.imread(Path(img_path)) + except: + if img_path=='missing': + pass counter += 1 + + + stack_name = cast_stack_name(frame.cycle.iloc[0], index, conformed_markers) if ill_corr: @@ -352,7 +388,7 @@ def create_stack(cycle_info_df, else: tag = '' - stack_file_path = stack_output_dir/ f'{tag}{stack_name}' + stack_file_path = stack_output_dir / f'{tag}{stack_name}' if extended_outputs: out['index'].append(index)