Updates from Lutz on May 1, 2024

NSLS-II · May 1, 2024 · fc45928 · fc45928
1 parent 56c64e0
commit fc45928
Show file tree

Hide file tree

Showing 3 changed files with 201 additions and 51 deletions.
diff --git a/pyCHX/chx_generic_functions.py b/pyCHX/chx_generic_functions.py
@@ -3135,73 +3135,123 @@ def get_full_data_path( uid ):
     #print(p,p2)
     return p + '_' + str(p2) + '_master.h5'
 
-
-
-def get_sid_filenames(header):
-    """YG. Dev Jan, 2016
-    Get a bluesky scan_id, unique_id, filename by giveing uid
-
-    Parameters
-    ----------
-    header: a header of a bluesky scan, e.g. db[-1]
-
-    Returns
-    -------
-    scan_id: integer
-    unique_id: string, a full string of a uid
-    filename: sring
-
-    Usuage:
-    sid,uid, filenames   = get_sid_filenames(db[uid])
-
+def get_sid_filenames(hdr,verbose=False):
     """
-    from collections import defaultdict
-    from glob import glob
-    from pathlib import Path
-
-    filepaths = []
-    resources = {}  # uid: document
-    datums = defaultdict(list)  # uid: List(document)
-    for name, doc in header.documents():
-        if name == "resource":
-            resources[doc["uid"]] = doc
-        elif name == "datum":
-            datums[doc["resource"]].append(doc)
-        elif name == "datum_page":
-            for datum in event_model.unpack_datum_page(doc):
-                datums[datum["resource"]].append(datum)
-    for resource_uid, resource in resources.items():
-        file_prefix = Path(resource.get('root', '/'), resource["resource_path"])
-        if 'eiger' not in resource['spec'].lower():
-            continue
-        for datum in datums[resource_uid]:
-            dm_kw = datum["datum_kwargs"]
-            seq_id = dm_kw['seq_id']
-            new_filepaths = glob(f'{file_prefix!s}_{seq_id}*')
-            filepaths.extend(new_filepaths)
-    return header.start['scan_id'],  header.start['uid'], filepaths
-
-def load_dask_data(uid,detector,reverse=False,rot90=False):
+    get scan_id, uid and detector filename from databroker
+    get_sid_filenames(hdr,verbose=False)
+    hdr = db[uid]
+    returns (scan_id, uid, filepath)
+    LW 04/30/2024
+    """
+    import glob
+    from time import strftime, localtime
+    start_doc = hdr.start
+    stop_doc = hdr.stop
+    success = False
+
+    ret = (start_doc["scan_id"], start_doc["uid"], glob.glob(f"{start_doc['data path']}*_{start_doc['sequence id']}_master.h5")) # looking for (eiger) datafile at the path specified in metadata
+    if len(ret[2])==0:
+        if verbose: print('could not find detector filename from "data_path" in metadata: %s'%start_doc['data path'])
+    else:
+         if verbose: print('Found detector filename from "data_path" in metadata!');success=True
+
+    if not success: # looking at path in metadata, but taking the date from the run start document
+        data_path=start_doc['data path'][:-11]+strftime("%Y/%m/%d/",localtime(start_doc['time']))
+        ret = (start_doc["scan_id"], start_doc["uid"], glob.glob(f"{data_path}*_{start_doc['sequence id']}_master.h5"))
+        if len(ret[2])==0:
+            if verbose: print('could not find detector filename in %s'%data_path)
+        else:
+             if verbose: print('Found detector filename in %s'%data_path);success=True
+
+    if not success: # looking at path in metadata, but taking the date from the run stop document (in case the date rolled over between creating the start doc and staging the detector)
+        data_path=start_doc['data path'][:-11]+strftime("%Y/%m/%d/",localtime(stop_doc['time']))
+        ret = (start_doc["scan_id"], start_doc["uid"], glob.glob(f"{data_path}*_{start_doc['sequence id']}_master.h5"))
+        if len(ret[2])==0:
+            if verbose: print('Sorry, could not find detector filename....')
+        else:
+             if verbose: print('Found detector filename in %s'%data_path);success=True
+    return ret 
+
+
+# def get_sid_filenames(header):
+#     """YG. Dev Jan, 2016
+#     Get a bluesky scan_id, unique_id, filename by giveing uid
+
+#     Parameters
+#     ----------
+#     header: a header of a bluesky scan, e.g. db[-1]
+
+#     Returns
+#     -------
+#     scan_id: integer
+#     unique_id: string, a full string of a uid
+#     filename: sring
+
+#     Usuage:
+#     sid,uid, filenames   = get_sid_filenames(db[uid])
+
+#     """
+#     from collections import defaultdict
+#     from glob import glob
+#     from pathlib import Path
+
+#     filepaths = []
+#     resources = {}  # uid: document
+#     datums = defaultdict(list)  # uid: List(document)
+#     for name, doc in header.documents():
+#         if name == "resource":
+#             resources[doc["uid"]] = doc
+#         elif name == "datum":
+#             datums[doc["resource"]].append(doc)
+#         elif name == "datum_page":
+#             for datum in event_model.unpack_datum_page(doc):
+#                 datums[datum["resource"]].append(datum)
+#     for resource_uid, resource in resources.items():
+#         file_prefix = Path(resource.get('root', '/'), resource["resource_path"])
+#         if 'eiger' not in resource['spec'].lower():
+#             continue
+#         for datum in datums[resource_uid]:
+#             dm_kw = datum["datum_kwargs"]
+#             seq_id = dm_kw['seq_id']
+#             new_filepaths = glob(f'{file_prefix!s}_{seq_id}*')
+#             filepaths.extend(new_filepaths)
+#     return header.start['scan_id'],  header.start['uid'], filepaths
+
+def load_dask_data(uid,detector,mask_path_full,reverse=False,rot90=False):
     """
     load data as dask-array
     get image md (direct beam, wavelength, sample-detector distance,...) from databroker documents (no need to read an actual image)
+    get pixel_mask and binary_mask from static location (getting it from image metadata takes forever in some conda envs...)
     load_dask_data(uid,detector,reverse=False,rot90=False)
+    uid: uid (str)
+    detector: md['detector']
+    mask_path_full: current standard would be _mask_path_+'pixel_masks/'
     returns detector_images(dask-array), image_md
     LW 04/26/2024
     """
     import dask
     hdr=db[uid]
     det=detector.split('_image')[0]
-    # collect image metadata
+    # collect image metadata from loading single image   
     img_md_dict={'detector_distance':'det_distance','incident_wavelength':'wavelength','frame_time':'cam_acquire_period','count_time':'cam_acquire_time','num_images':'cam_num_images','beam_center_x':'beam_center_x','beam_center_y':'beam_center_y'}
     img_md={}
     for k in list(img_md_dict.keys()):
         img_md[k]=hdr.config_data(det)['primary'][0]['%s_%s'%(det,img_md_dict[k])]
     if md['detector'] in ['eiger4m_single_image','eiger1m_single_image','eiger500K_single_image']:
         img_md.update({'y_pixel_size': 7.5e-05, 'x_pixel_size': 7.5e-05})
-    else: img_md.update({'y_pixel_size': None, 'x_pixel_size': None})
+        got_pixel_mask=True
+    else:
+        img_md.update({'y_pixel_size': None, 'x_pixel_size': None})
+        got_pixel_mask=False
+    # load pixel mask from static location
+    if got_pixel_mask:   
+        json_open=open(_mask_path_+'pixel_masks/pixel_mask_compression_%s.json'%detector.split('_')[0])
+        mask_dict=json.load(json_open)
+        img_md['pixel_mask']=np.array(mask_dict['pixel_mask'])
+        img_md['binary_mask']=np.array(mask_dict['binary_mask'])
+        del mask_dict
+
     # load image data as dask-arry:
-    #raise Exception('this was supposed to break!')
     dimg=hdr.xarray_dask()[md['detector']][0]
     if reverse:
         dimg=dask.array.flip(dimg,axis=(0,1))

diff --git a/pyCHX/chx_outlier_detection.py b/pyCHX/chx_outlier_detection.py
@@ -0,0 +1,98 @@
+def is_outlier(points,thresh=3.5,verbose=False):
+    """MAD test
+    """     
+    points.tolist()
+    if len(points) ==1:
+        points=points[:,None]
+        if verbose:
+            print('input to is_outlier is a single point...')
+    median = np.median(points)*np.ones(np.shape(points))#, axis=0)
+
+    diff = (points-median)**2
+    diff=np.sqrt(diff)
+    med_abs_deviation= np.median(diff)
+    modified_z_score = .6745*diff/med_abs_deviation
+    return modified_z_score > thresh
+
+def outlier_mask(avg_img,mask,roi_mask,outlier_threshold = 7.5,maximum_outlier_fraction = .1,verbose=False,plot=False):
+    """
+    outlier_mask(avg_img,mask,roi_mask,outlier_threshold = 7.5,maximum_outlier_fraction = .1,verbose=False,plot=False)
+    avg_img: average image data (2D)
+    mask: 2D array, same size as avg_img with pixels that are already masked
+    roi_mask: 2D array, same size as avg_img, ROI labels 'encoded' as mask values (i.e. all pixels belonging to ROI 5 have the value 5)
+    outlier_threshold: threshold for MAD test
+    maximum_outlier_fraction: maximum fraction of pixels in an ROI that can be classifed as outliers. If the detected fraction is higher, no outliers will be masked for that ROI.
+    verbose: 'True' enables message output
+    plot: 'True' enables visualization of outliers
+    returns: mask (dtype=float): 0 for pixels that have been classified as outliers, 1 else
+    dependency: is_outlier()
+
+    function does outlier detection for each ROI separately based on pixel intensity in avg_img*mask and ROI specified by roi_mask, using the median-absolute-deviation (MAD) method
+
+    by LW 06/21/2023
+    """
+    hhmask = np.ones(np.shape(roi_mask))
+    pc=1
+
+    for rn in np.arange(1,np.max(roi_mask)+1,1):
+        rm=np.zeros(np.shape(roi_mask));rm=rm-1;rm[np.where( roi_mask == rn)]=1
+        pixel = roi.roi_pixel_values(avg_img*rm, roi_mask, [rn] )
+        out_l = is_outlier((avg_img*mask*rm)[rm>-1], thresh=outlier_threshold)
+        if np.nanmax(out_l)>0: # Did detect at least one outlier
+            ave_roi_int = np.nanmean((pixel[0][0])[out_l<1])
+            if verbose: print('ROI #%s\naverage ROI intensity: %s'%(rn,ave_roi_int))
+            try:
+                upper_outlier_threshold = np.nanmin((out_l*pixel[0][0])[out_l*pixel[0][0]>ave_roi_int])
+                if verbose: print('upper outlier threshold: %s'%upper_outlier_threshold)
+            except:
+                upper_outlier_threshold = False
+                if verbose: print('no upper outlier threshold found')
+            ind1 = (out_l*pixel[0][0])>0; ind2 =  (out_l*pixel[0][0])< ave_roi_int
+            try:
+                lower_outlier_threshold = np.nanmax((out_l*pixel[0][0])[ind1*ind2])
+            except:
+                lower_outlier_threshold = False
+                if verbose: print('no lower outlier threshold found')
+        else:
+            if verbose:  print('ROI #%s: no outliers detected'%rn)
+
+        ### MAKE SURE we don't REMOVE more than x percent of the pixels in the roi 
+        outlier_fraction =  np.sum(out_l)/len(pixel[0][0])
+        if verbose: print('fraction of pixel values detected as outliers: %s'%np.round(outlier_fraction,2))
+        if outlier_fraction > maximum_outlier_fraction:
+            if verbose: print('fraction of pixel values detected as outliers > than maximum fraction %s allowed -> NOT masking outliers...check threshold for MAD and maximum fraction of outliers allowed'%maximum_outlier_fraction)
+            upper_outlier_threshold = False; lower_outlier_threshold = False
+
+        if upper_outlier_threshold:
+            hhmask[avg_img*rm > upper_outlier_threshold] = 0
+        if lower_outlier_threshold:
+            hhmask[avg_img*rm < lower_outlier_threshold] = 0
+
+        if plot:
+            if pc == 1: fig,ax = plt.subplots(1,5,figsize=(24,4))
+            plt.subplot(1,5,pc);pc+=1;
+            if pc>5: pc=1
+            pixel = roi.roi_pixel_values(avg_img*rm*mask, roi_mask, [rn] )
+            plt.plot( pixel[0][0] ,'bo',markersize=1.5 )
+            if upper_outlier_threshold or lower_outlier_threshold:
+                x=np.arange(len(out_l))
+                plt.plot([x[0],x[-1]],[ave_roi_int,ave_roi_int],'g--',label='ROI average: %s'%np.round(ave_roi_int,4))
+            if upper_outlier_threshold:
+                ind=(out_l*pixel[0][0])> upper_outlier_threshold
+                plt.plot(x[ind],(out_l*pixel[0][0])[ind],'r+')
+                plt.plot([x[0],x[-1]],[upper_outlier_threshold,upper_outlier_threshold],'r--',label='upper thresh.: %s'%np.round(upper_outlier_threshold,4))
+            if lower_outlier_threshold:
+                ind=(out_l*pixel[0][0])< lower_outlier_threshold
+                plt.plot(x[ind],(out_l*pixel[0][0])[ind],'r+')
+                plt.plot([x[0],x[-1]],[lower_outlier_threshold,lower_outlier_threshold],'r--',label='lower thresh.: %s'%np.round(upper_outlier_threshold,4))
+            plt.ylabel('Intensity') ;plt.xlabel('pixel');plt.title('ROI #: %s'%rn);plt.legend(loc='best',fontsize=8)
+
+    if plot:
+        fig,ax = plt.subplots()
+        plt.imshow(hhmask)
+        hot_dark=np.nonzero(hhmask<1)
+        cmap = plt.cm.get_cmap('viridis')
+        plt.plot(hot_dark[1],hot_dark[0],'+',color=cmap(0))
+        plt.xlabel('pixel');plt.ylabel('pixel');plt.title('masked pixels with outlier threshold: %s'%outlier_threshold)
+
+    return hhmask
diff --git a/pyCHX/chx_xpcs_xsvs_jupyter_V1.py b/pyCHX/chx_xpcs_xsvs_jupyter_V1.py
@@ -3,9 +3,11 @@
 #from pyCHX.chx_generic_functions import get_short_long_labels_from_qval_dict
 #RUN_GUI = False
 #from pyCHX.chx_libs import markers
+from IPython import get_ipython
 import pandas as pds
-# temporary fix: get_data() uses depreciated np.float and gets imported from pyCHX/chx_correlationc.py -> clobber function with temporary fix:
-%run /nsls2/data/chx/legacy/analysis/2022_3/lwiegart/development/chx_analysis_setup.ipynb
+
+ip = get_ipython()
+ip.run_line_magic("run", "/nsls2/data/chx/shared/CHX_Software/packages/environment_management/chx_analysis_setup.ipynb")
 
 def get_t_iqc_uids( uid_list, setup_pargs, slice_num= 10, slice_width= 1):
     '''Get Iq at different time edge (difined by slice_num and slice_width) for a list of uids