From c03f4a646a9264a421d5a6877ae86c1f4f1350af Mon Sep 17 00:00:00 2001
From: Victor Perez Meza <victor.x.perezmesa@gsk.com>
Date: Fri, 10 Jan 2025 14:00:21 +0100
Subject: [PATCH] added exceptions script to better handle missing channels

---
 macsima2mc/CLI.py               |   4 +-
 macsima2mc/exceptions.py        |  92 ++++++++++++++++++++
 macsima2mc/illumination_corr.py |  11 ++-
 macsima2mc/macsima2mc.py        |   3 +-
 macsima2mc/ome_schema.py        |  15 ++--
 macsima2mc/tools.py             | 146 ++++++++++++++++++++------------
 6 files changed, 206 insertions(+), 65 deletions(-)
 create mode 100644 macsima2mc/exceptions.py

diff --git a/macsima2mc/CLI.py b/macsima2mc/CLI.py
index 8d09b8b..5a80a7e 100644
--- a/macsima2mc/CLI.py
+++ b/macsima2mc/CLI.py
@@ -24,13 +24,13 @@ def get_args():
                     type=pathlib.Path,
                     help='Path where the stacks will be saved. If directory does not exist it will be created.'
                     )
-
+    #Optional arguments
     parser.add_argument('-rm',
                     '--reference_marker',
                     default='DAPI',
                     help='string specifying the name of the reference marker'
                     )
-
+    
     parser.add_argument('-od',
                     '--output_dir',
                     default='raw',
diff --git a/macsima2mc/exceptions.py b/macsima2mc/exceptions.py
new file mode 100644
index 0000000..391ba83
--- /dev/null
+++ b/macsima2mc/exceptions.py
@@ -0,0 +1,92 @@
+import numpy as np
+import pandas as pd
+
+
+def any_ref(mf_tuple,
+            ref_marker='DAPI'):
+    """
+    This function checks if the reference marker is present in the mf_tuple.
+    Args:
+        mf_tuple (tuple): tuple with the markers and filters
+        ref_marker (str): reference marker used for registration
+    Returns:
+        bool: True if the reference marker is present in the mf_tuple, False otherwise.
+    """
+
+    exist_ref = False
+    for m in mf_tuple:
+        if m[0] == ref_marker:
+            exist_ref = True
+            break
+    return exist_ref
+
+def miss_channel(chann_index,chans_meta,conformed_markers):
+    
+    if chans_meta[ chann_index ].size:
+        pass
+    else:
+
+        ref_index=np.argwhere([meta.size > 0 for meta in chans_meta ]).flatten()[0]
+        chann_meta=chans_meta[ ref_index ]
+        edit_feats={'marker':conformed_markers[ chann_index ][0], 
+                    'filter':conformed_markers[ chann_index ][1]}
+                    #'exposure?':}
+                    #how are conformed markers formed?
+    return True
+
+
+
+def at_roi(grouped,dimensions,ref_marker):
+    #for dimensions is expected that the last element of the list is the exposure_level
+    rows=[]
+    for index,frame in grouped:
+        marker_filter_map = list( frame.groupby(["marker","filter"]).indices.keys() )
+        ref_exists=any_ref(marker_filter_map,ref_marker)
+        subst_exp_level=index[-1]
+        rows.append( list(index) + [ ref_exists, subst_exp_level ] )
+
+    df=pd.DataFrame(rows,columns=dimensions+['ref_marker','aux_exp_level'])
+
+    frames=[]
+    for index,frame in df.groupby(dimensions[:-1]):
+        substitute=frame.loc[ frame['ref_marker']==True,'exposure_level'].max()
+        frame['aux_exp_level']=frame['aux_exp_level'].where(frame['ref_marker'],substitute)
+        frames.append(frame)
+
+    return pd.concat(frames)
+
+def at_tile(group,marker_filter_map):
+    rows=[]
+    for tile_id,frame in group.groupby(['tile']):
+        aux_array = [   frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)].size
+                        for marker, filt in marker_filter_map 
+                    ]
+
+        aux_array =np.array(aux_array)==0
+        missing_channel=np.any(aux_array)
+
+        if missing_channel:
+            channel_indices=np.argwhere(aux_array).flatten()
+            #Each missing (marker,filter) pair will be separated by a colon ":", i.e. marker1,filter1:marker2,filter2
+            channel= ':'.join( [ ','.join(marker_filter_map[i]) for i in channel_indices] )
+        else:
+            channel=''
+            
+        qc_data=[tile_id[0],missing_channel,channel,tile_id[0]]
+        cols=['tile','missing_ch','channels','aux_tile']
+        rows.append(   qc_data )
+
+    df=pd.DataFrame(rows,columns=cols)
+    auxiliary_tile_index=df.loc[df['missing_ch']==False ][['tile']].values[0]
+    df.loc[ df['missing_ch']==True, ['aux_tile']]=auxiliary_tile_index
+    
+    return df 
+
+
+
+
+
+
+
+
+
diff --git a/macsima2mc/illumination_corr.py b/macsima2mc/illumination_corr.py
index 735b8eb..8863519 100644
--- a/macsima2mc/illumination_corr.py
+++ b/macsima2mc/illumination_corr.py
@@ -28,6 +28,13 @@ def extract_channel_imgs (stack,indices):
 
     return stack[indices,:,:]
 
+def filter_out_blanks(channel_tiles):
+
+    signal_indices=[index for index in range(0,channel_tiles.shape[0]) if np.sum(channel_tiles[index,:,:])>0 ]
+    filtered_tiles=channel_tiles[signal_indices,:,:]
+    return filtered_tiles
+
+
 
 def apply_corr(uncorr_stack,no_of_channels):
     """
@@ -46,7 +53,9 @@ def apply_corr(uncorr_stack,no_of_channels):
 
     for ind_list in indices:
         uncorr_imgs = extract_channel_imgs(uncorr_stack, ind_list)
-        basic.fit(uncorr_imgs)
+        imgs_for_ffp=filter_out_blanks(uncorr_imgs)
+        #basic.fit(uncorr_imgs)
+        basic.fit(imgs_for_ffp)
         ffp = basic.flatfield
         corr_stack[ind_list,:,:] = np.uint16(np.clip(uncorr_imgs.astype(float) / ffp, 0, 65535))
 
diff --git a/macsima2mc/macsima2mc.py b/macsima2mc/macsima2mc.py
index ae1160e..44d94fc 100644
--- a/macsima2mc/macsima2mc.py
+++ b/macsima2mc/macsima2mc.py
@@ -22,7 +22,8 @@ def main():
 
     # Create stack
     cycle_info = tools.append_metadata( cycle_info )
-    #cycle_info.to_csv( args.output / 'cycle_{c}_info.csv'.format(c=f'{6:03d}'), index=False )
+    
+    #cycle_info.to_csv( args.output / 'cycle_{c}_info.csv'.format(c=f'{1:03d}'), index=False )
     output_dirs = tools.create_stack(cycle_info,
                                      output,
                                      ref_marker=ref,
diff --git a/macsima2mc/ome_schema.py b/macsima2mc/ome_schema.py
index 0ed8966..0ca499e 100644
--- a/macsima2mc/ome_schema.py
+++ b/macsima2mc/ome_schema.py
@@ -5,6 +5,7 @@
 import platform
 
 
+
 def INPUTS(frame, conformed_markers):
     """
     This function creates a dictionary with the metadata of the tiles.
@@ -15,12 +16,14 @@ def INPUTS(frame, conformed_markers):
         dict: dictionary with the metadata of the tiles.
     """
 
-    features = frame.columns.values
-    inputs = {column:[] for column in features }
-    metadata = [frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)] for marker, filt in conformed_markers]
-    for meta in metadata:
-        for f in features:
-            inputs[f].append(meta[f].values[0])
+    #features = frame.columns.values
+    #inputs = {column:[] for column in features }
+    #metadata = [frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)] for marker, filt in conformed_markers]
+    #for meta in metadata:
+    #    for f in features:
+    #        inputs[f].append(meta[f].values[0])2
+    inputs=frame.to_dict('list')
+
     return inputs
 
 
diff --git a/macsima2mc/tools.py b/macsima2mc/tools.py
index 9ec116d..5a0e3d7 100644
--- a/macsima2mc/tools.py
+++ b/macsima2mc/tools.py
@@ -1,5 +1,4 @@
 from tkinter.font import names
-
 from templates import info_dic
 import re
 import pandas as pd
@@ -8,6 +7,7 @@
 import numpy as np
 from pathlib import Path
 import ome_writer
+import exceptions as expt
 import illumination_corr
 
 
@@ -15,7 +15,7 @@ def merge_dicts(list_of_dicts):
     """
     This function merges a list of dictionaries into a single dictionary where the values are stored in lists.
     Args:
-        list_of_dicts (list): list of dictionaries to merge
+        list_of_dicts (list): list of dictionaries with common keys
     Returns:
         merged_dict (dict): dictionary with the values stored in lists
     """
@@ -147,28 +147,8 @@ def conform_markers(mf_tuple,
     markers.insert(0,(ref_marker,ref_marker))
     return markers
 
-def any_ref(mf_tuple,
-            ref_marker='DAPI'):
-    """
-    This function checks if the reference marker is present in the mf_tuple.
-    Args:
-        mf_tuple (tuple): tuple with the markers and filters
-        ref_marker (str): reference marker used for registration
-    Returns:
-        bool: True if the reference marker is present in the mf_tuple, False otherwise.
-    """
-
-    exist_ref = False
-    for m in mf_tuple:
-        if m[0] == ref_marker:
-            exist_ref = True
-            break
-    return exist_ref
-
-
-def init_stack(ref_tile_index,
-               groupby_obj,
-               marker_filter_map):
+def init_stack(group,
+               no_of_channels):
     """
     This function initializes the stack array with the dimensions of the tiles.
     Args:
@@ -179,12 +159,15 @@ def init_stack(ref_tile_index,
         np.ndarray: array with the dimensions of the stack array (depth, height, width) and the dtype of the
         reference tile.
     """
-    ref_tile = groupby_obj.get_group((ref_tile_index,))
-    total_tiles = len(groupby_obj)
-    width = ref_tile.size_x.values[0]
-    height = ref_tile.size_y.values[0]
-    depth = total_tiles*len(marker_filter_map)
-    stack = np.zeros( (depth,int(height),int(width)), dtype=ref_tile.type.values[0])
+    aux_array=[ group['size_x'].unique() , group['size_y'].unique(), group['type'].unique() ]
+    check_array=np.array( [ len(element) for element in aux_array ] )
+    if np.any(check_array>1):
+        print("Warning:tiles of these acquisition have no unique value for the following columns: xy-size or data type")
+    width, height, data_type = [ element[0] for element in aux_array  ]
+    total_tiles = group['tile'].nunique()
+    depth = total_tiles * no_of_channels
+    
+    stack = np.zeros( (depth,int(height),int(width)), dtype=data_type)
 
     return stack
 
@@ -257,7 +240,7 @@ def outputs_dic():
 
 
 def select_by_exposure(list_indices,
-                       exp_index=4,
+                       exp_index,
                        target='max'):
     """
     This function selects the indices with the maximum or minimum exposure time.
@@ -280,6 +263,45 @@ def select_by_exposure(list_indices,
 
     return selected_indices
 
+def append_reference(frame,frame_index,groups,aux_reference_exp_level,ref_marker_name):
+
+    aux_index = list(frame_index)
+    aux_index[-1] = aux_reference_exp_level
+    aux_index = tuple(aux_index)
+    aux_group = groups.get_group(aux_index)
+    aux_group = aux_group.loc[aux_group['marker']==ref_marker_name]
+    frame_ = pd.concat([frame, aux_group])
+    return frame_
+
+def append_missing_channels(group, exception_table ):
+    #exception_table.cols=["tile","missing_ch","channels","aux_tile"]
+    tiles=group.groupby(['tile'])
+    incomplete_tiles=exception_table.loc[exception_table['missing_ch']==True ] 
+    add_tiles=[]
+    for row in incomplete_tiles.itertuples(index=False):
+        aux_tile_df=tiles.get_group( (row.aux_tile,) )
+        missing_markers=[ tuple(element.split(',')) for element in row.channels.split(':') ]
+        for marker, filt in missing_markers:
+            df=aux_tile_df.loc[ (aux_tile_df['marker']==marker) & (aux_tile_df['filter']==filt) ]
+            #df['tile']=row.tile
+            df.loc[:,'tile']=row.tile
+            add_tiles.append(df)
+
+    aux_group=pd.concat(add_tiles)
+    aux_group[ ['full_path','img_name'] ]='missing'
+    group_=pd.concat([group,aux_group])
+
+    return group_
+
+def conform_acquisition_group(group,conformed_markers):
+    aux=[]
+    for tile_id,frame in group.groupby(['tile']):
+        aux.extend([ frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)] for marker, filt in conformed_markers ]) 
+    group_conformed=pd.concat(aux)
+
+    return group_conformed
+
+
 
 def create_stack(cycle_info_df,
                  output_dir,
@@ -307,42 +329,56 @@ def create_stack(cycle_info_df,
     else:
         out = {'output_paths':[]}
 
-    acq_group = cycle_info_df.groupby(['source','rack','well','roi','exposure_level'])
+    #'exposure_level' should always be the last element of the dimensions list
+    dimensions=['source','rack','well','roi','exposure_level']
+    
+    acq_group = cycle_info_df.groupby(dimensions)
     acq_index = list( acq_group.indices.keys() )
+    expt_matrix_roi=expt.at_roi(acq_group,dimensions,ref_marker).groupby(dimensions)#exceptions matrix
 
     if hi_exp:
-        acq_index = select_by_exposure(acq_index)
+        exp_level_index=np.argwhere( np.asarray(dimensions)=='exposure_level' ).flatten()[0]
+        acq_index = select_by_exposure(acq_index,exp_level_index,target='max')
 
     for index in acq_index:
         stack_output_dir = output_dir / cast_outdir_name(index) / out_folder
         stack_output_dir.mkdir(parents=True, exist_ok=True)
         group = acq_group.get_group(index)
 
-        #use tile 1 as reference to determine the height and width of the tiles
-        tile_no = group.tile.values
-        ref_tile = group.groupby(['tile']).get_group((tile_no[0],))
-        marker_filter_map = list(ref_tile.groupby(["marker","filter"]).indices.keys())
-        exist_ref = any_ref(marker_filter_map,ref_marker)
+        exist_ref, aux_reference= expt_matrix_roi.get_group(index )[ ['ref_marker','aux_exp_level'] ].iloc[0].values
         if not exist_ref:
-            index_aux = list(index)
-            index_aux[-1] = 1
-            index_aux = tuple(index_aux)
-            aux_group = acq_group.get_group(index_aux)
-            aux_group = aux_group.loc[aux_group['marker']==ref_marker]
-            group = pd.concat([group, aux_group])
-
-        #group.to_csv(stack_output_dir.parent.absolute() /'info.csv' )
-        groups_of_tiles = group.groupby(['tile'])
+            group=append_reference(group, index, acq_group, aux_reference, ref_marker)
+        #extract list of unique pairs (marker,filter)
+        marker_filter_map=group[['marker','filter']].value_counts().index.values
+        #conform the pairs (marker,filter) as to have the reference marker in the first place (1st channel) of the list 
         conformed_markers = conform_markers(marker_filter_map, ref_marker)
-        stack = init_stack(tile_no[0], groups_of_tiles, conformed_markers)
-        ome = ome_writer.create_ome(group, conformed_markers)
+        
+        expt_matrix_tiles=expt.at_tile(group,conformed_markers)
+        if expt_matrix_tiles['missing_ch'].any():
+            group=append_missing_channels(group,expt_matrix_tiles)
+
+        stack = init_stack(group, len( conformed_markers))
+        conformed_group=conform_acquisition_group(group,conformed_markers)
+        #ome = ome_writer.create_ome(group, conformed_markers)
+        ome = ome_writer.create_ome(conformed_group, conformed_markers)
         counter = 0
-
-        for tile_no, frame in groups_of_tiles:
-            for marker, filter in conformed_markers:
-                target_path = frame.loc[ (frame['marker']==marker) & (frame['filter']==filter) ].full_path.values[0]
-                stack[counter,:,:] = tifff.imread(Path(target_path))
+        groups_of_tiles = conformed_group.groupby(['tile'])
+        #for tile_no, frame in groups_of_tiles:
+        #    for marker, filter in conformed_markers:
+        #        target_path = frame.loc[ (frame['marker']==marker) & (frame['filter']==filter) ].full_path.values[0]
+        #       stack[counter,:,:] = tifff.imread(Path(target_path))
+        #      counter += 1
+        for tile_id,frame in groups_of_tiles:
+            for img_path in frame['full_path'].values:
+                try:
+                    stack[counter,:,:] = tifff.imread(Path(img_path))
+                except:
+                    if img_path=='missing':
+                        pass
                 counter += 1
+                
+
+
         stack_name = cast_stack_name(frame.cycle.iloc[0], index, conformed_markers)
 
         if ill_corr:
@@ -352,7 +388,7 @@ def create_stack(cycle_info_df,
         else:
             tag = ''
 
-        stack_file_path = stack_output_dir/ f'{tag}{stack_name}'
+        stack_file_path = stack_output_dir / f'{tag}{stack_name}'
 
         if extended_outputs:
             out['index'].append(index)