Skip to content

Commit

Permalink
Updates from Lutz on May 1, 2024
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrei Fluerasu committed May 1, 2024
1 parent 56c64e0 commit fc45928
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 51 deletions.
148 changes: 99 additions & 49 deletions pyCHX/chx_generic_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3135,73 +3135,123 @@ def get_full_data_path( uid ):
#print(p,p2)
return p + '_' + str(p2) + '_master.h5'



def get_sid_filenames(header):
"""YG. Dev Jan, 2016
Get a bluesky scan_id, unique_id, filename by giveing uid
Parameters
----------
header: a header of a bluesky scan, e.g. db[-1]
Returns
-------
scan_id: integer
unique_id: string, a full string of a uid
filename: sring
Usuage:
sid,uid, filenames = get_sid_filenames(db[uid])
def get_sid_filenames(hdr,verbose=False):
"""
from collections import defaultdict
from glob import glob
from pathlib import Path

filepaths = []
resources = {} # uid: document
datums = defaultdict(list) # uid: List(document)
for name, doc in header.documents():
if name == "resource":
resources[doc["uid"]] = doc
elif name == "datum":
datums[doc["resource"]].append(doc)
elif name == "datum_page":
for datum in event_model.unpack_datum_page(doc):
datums[datum["resource"]].append(datum)
for resource_uid, resource in resources.items():
file_prefix = Path(resource.get('root', '/'), resource["resource_path"])
if 'eiger' not in resource['spec'].lower():
continue
for datum in datums[resource_uid]:
dm_kw = datum["datum_kwargs"]
seq_id = dm_kw['seq_id']
new_filepaths = glob(f'{file_prefix!s}_{seq_id}*')
filepaths.extend(new_filepaths)
return header.start['scan_id'], header.start['uid'], filepaths

def load_dask_data(uid,detector,reverse=False,rot90=False):
get scan_id, uid and detector filename from databroker
get_sid_filenames(hdr,verbose=False)
hdr = db[uid]
returns (scan_id, uid, filepath)
LW 04/30/2024
"""
import glob
from time import strftime, localtime
start_doc = hdr.start
stop_doc = hdr.stop
success = False

ret = (start_doc["scan_id"], start_doc["uid"], glob.glob(f"{start_doc['data path']}*_{start_doc['sequence id']}_master.h5")) # looking for (eiger) datafile at the path specified in metadata
if len(ret[2])==0:
if verbose: print('could not find detector filename from "data_path" in metadata: %s'%start_doc['data path'])
else:
if verbose: print('Found detector filename from "data_path" in metadata!');success=True

if not success: # looking at path in metadata, but taking the date from the run start document
data_path=start_doc['data path'][:-11]+strftime("%Y/%m/%d/",localtime(start_doc['time']))
ret = (start_doc["scan_id"], start_doc["uid"], glob.glob(f"{data_path}*_{start_doc['sequence id']}_master.h5"))
if len(ret[2])==0:
if verbose: print('could not find detector filename in %s'%data_path)
else:
if verbose: print('Found detector filename in %s'%data_path);success=True

if not success: # looking at path in metadata, but taking the date from the run stop document (in case the date rolled over between creating the start doc and staging the detector)
data_path=start_doc['data path'][:-11]+strftime("%Y/%m/%d/",localtime(stop_doc['time']))
ret = (start_doc["scan_id"], start_doc["uid"], glob.glob(f"{data_path}*_{start_doc['sequence id']}_master.h5"))
if len(ret[2])==0:
if verbose: print('Sorry, could not find detector filename....')
else:
if verbose: print('Found detector filename in %s'%data_path);success=True
return ret


# def get_sid_filenames(header):
# """YG. Dev Jan, 2016
# Get a bluesky scan_id, unique_id, filename by giveing uid

# Parameters
# ----------
# header: a header of a bluesky scan, e.g. db[-1]

# Returns
# -------
# scan_id: integer
# unique_id: string, a full string of a uid
# filename: sring

# Usuage:
# sid,uid, filenames = get_sid_filenames(db[uid])

# """
# from collections import defaultdict
# from glob import glob
# from pathlib import Path

# filepaths = []
# resources = {} # uid: document
# datums = defaultdict(list) # uid: List(document)
# for name, doc in header.documents():
# if name == "resource":
# resources[doc["uid"]] = doc
# elif name == "datum":
# datums[doc["resource"]].append(doc)
# elif name == "datum_page":
# for datum in event_model.unpack_datum_page(doc):
# datums[datum["resource"]].append(datum)
# for resource_uid, resource in resources.items():
# file_prefix = Path(resource.get('root', '/'), resource["resource_path"])
# if 'eiger' not in resource['spec'].lower():
# continue
# for datum in datums[resource_uid]:
# dm_kw = datum["datum_kwargs"]
# seq_id = dm_kw['seq_id']
# new_filepaths = glob(f'{file_prefix!s}_{seq_id}*')
# filepaths.extend(new_filepaths)
# return header.start['scan_id'], header.start['uid'], filepaths

def load_dask_data(uid,detector,mask_path_full,reverse=False,rot90=False):
"""
load data as dask-array
get image md (direct beam, wavelength, sample-detector distance,...) from databroker documents (no need to read an actual image)
get pixel_mask and binary_mask from static location (getting it from image metadata takes forever in some conda envs...)
load_dask_data(uid,detector,reverse=False,rot90=False)
uid: uid (str)
detector: md['detector']
mask_path_full: current standard would be _mask_path_+'pixel_masks/'
returns detector_images(dask-array), image_md
LW 04/26/2024
"""
import dask
hdr=db[uid]
det=detector.split('_image')[0]
# collect image metadata
# collect image metadata from loading single image
img_md_dict={'detector_distance':'det_distance','incident_wavelength':'wavelength','frame_time':'cam_acquire_period','count_time':'cam_acquire_time','num_images':'cam_num_images','beam_center_x':'beam_center_x','beam_center_y':'beam_center_y'}
img_md={}
for k in list(img_md_dict.keys()):
img_md[k]=hdr.config_data(det)['primary'][0]['%s_%s'%(det,img_md_dict[k])]
if md['detector'] in ['eiger4m_single_image','eiger1m_single_image','eiger500K_single_image']:
img_md.update({'y_pixel_size': 7.5e-05, 'x_pixel_size': 7.5e-05})
else: img_md.update({'y_pixel_size': None, 'x_pixel_size': None})
got_pixel_mask=True
else:
img_md.update({'y_pixel_size': None, 'x_pixel_size': None})
got_pixel_mask=False
# load pixel mask from static location
if got_pixel_mask:
json_open=open(_mask_path_+'pixel_masks/pixel_mask_compression_%s.json'%detector.split('_')[0])
mask_dict=json.load(json_open)
img_md['pixel_mask']=np.array(mask_dict['pixel_mask'])
img_md['binary_mask']=np.array(mask_dict['binary_mask'])
del mask_dict

# load image data as dask-arry:
#raise Exception('this was supposed to break!')
dimg=hdr.xarray_dask()[md['detector']][0]
if reverse:
dimg=dask.array.flip(dimg,axis=(0,1))
Expand Down
98 changes: 98 additions & 0 deletions pyCHX/chx_outlier_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
def is_outlier(points,thresh=3.5,verbose=False):
"""MAD test
"""
points.tolist()
if len(points) ==1:
points=points[:,None]
if verbose:
print('input to is_outlier is a single point...')
median = np.median(points)*np.ones(np.shape(points))#, axis=0)

diff = (points-median)**2
diff=np.sqrt(diff)
med_abs_deviation= np.median(diff)
modified_z_score = .6745*diff/med_abs_deviation
return modified_z_score > thresh

def outlier_mask(avg_img,mask,roi_mask,outlier_threshold = 7.5,maximum_outlier_fraction = .1,verbose=False,plot=False):
"""
outlier_mask(avg_img,mask,roi_mask,outlier_threshold = 7.5,maximum_outlier_fraction = .1,verbose=False,plot=False)
avg_img: average image data (2D)
mask: 2D array, same size as avg_img with pixels that are already masked
roi_mask: 2D array, same size as avg_img, ROI labels 'encoded' as mask values (i.e. all pixels belonging to ROI 5 have the value 5)
outlier_threshold: threshold for MAD test
maximum_outlier_fraction: maximum fraction of pixels in an ROI that can be classifed as outliers. If the detected fraction is higher, no outliers will be masked for that ROI.
verbose: 'True' enables message output
plot: 'True' enables visualization of outliers
returns: mask (dtype=float): 0 for pixels that have been classified as outliers, 1 else
dependency: is_outlier()
function does outlier detection for each ROI separately based on pixel intensity in avg_img*mask and ROI specified by roi_mask, using the median-absolute-deviation (MAD) method
by LW 06/21/2023
"""
hhmask = np.ones(np.shape(roi_mask))
pc=1

for rn in np.arange(1,np.max(roi_mask)+1,1):
rm=np.zeros(np.shape(roi_mask));rm=rm-1;rm[np.where( roi_mask == rn)]=1
pixel = roi.roi_pixel_values(avg_img*rm, roi_mask, [rn] )
out_l = is_outlier((avg_img*mask*rm)[rm>-1], thresh=outlier_threshold)
if np.nanmax(out_l)>0: # Did detect at least one outlier
ave_roi_int = np.nanmean((pixel[0][0])[out_l<1])
if verbose: print('ROI #%s\naverage ROI intensity: %s'%(rn,ave_roi_int))
try:
upper_outlier_threshold = np.nanmin((out_l*pixel[0][0])[out_l*pixel[0][0]>ave_roi_int])
if verbose: print('upper outlier threshold: %s'%upper_outlier_threshold)
except:
upper_outlier_threshold = False
if verbose: print('no upper outlier threshold found')
ind1 = (out_l*pixel[0][0])>0; ind2 = (out_l*pixel[0][0])< ave_roi_int
try:
lower_outlier_threshold = np.nanmax((out_l*pixel[0][0])[ind1*ind2])
except:
lower_outlier_threshold = False
if verbose: print('no lower outlier threshold found')
else:
if verbose: print('ROI #%s: no outliers detected'%rn)

### MAKE SURE we don't REMOVE more than x percent of the pixels in the roi
outlier_fraction = np.sum(out_l)/len(pixel[0][0])
if verbose: print('fraction of pixel values detected as outliers: %s'%np.round(outlier_fraction,2))
if outlier_fraction > maximum_outlier_fraction:
if verbose: print('fraction of pixel values detected as outliers > than maximum fraction %s allowed -> NOT masking outliers...check threshold for MAD and maximum fraction of outliers allowed'%maximum_outlier_fraction)
upper_outlier_threshold = False; lower_outlier_threshold = False

if upper_outlier_threshold:
hhmask[avg_img*rm > upper_outlier_threshold] = 0
if lower_outlier_threshold:
hhmask[avg_img*rm < lower_outlier_threshold] = 0

if plot:
if pc == 1: fig,ax = plt.subplots(1,5,figsize=(24,4))
plt.subplot(1,5,pc);pc+=1;
if pc>5: pc=1
pixel = roi.roi_pixel_values(avg_img*rm*mask, roi_mask, [rn] )
plt.plot( pixel[0][0] ,'bo',markersize=1.5 )
if upper_outlier_threshold or lower_outlier_threshold:
x=np.arange(len(out_l))
plt.plot([x[0],x[-1]],[ave_roi_int,ave_roi_int],'g--',label='ROI average: %s'%np.round(ave_roi_int,4))
if upper_outlier_threshold:
ind=(out_l*pixel[0][0])> upper_outlier_threshold
plt.plot(x[ind],(out_l*pixel[0][0])[ind],'r+')
plt.plot([x[0],x[-1]],[upper_outlier_threshold,upper_outlier_threshold],'r--',label='upper thresh.: %s'%np.round(upper_outlier_threshold,4))
if lower_outlier_threshold:
ind=(out_l*pixel[0][0])< lower_outlier_threshold
plt.plot(x[ind],(out_l*pixel[0][0])[ind],'r+')
plt.plot([x[0],x[-1]],[lower_outlier_threshold,lower_outlier_threshold],'r--',label='lower thresh.: %s'%np.round(upper_outlier_threshold,4))
plt.ylabel('Intensity') ;plt.xlabel('pixel');plt.title('ROI #: %s'%rn);plt.legend(loc='best',fontsize=8)

if plot:
fig,ax = plt.subplots()
plt.imshow(hhmask)
hot_dark=np.nonzero(hhmask<1)
cmap = plt.cm.get_cmap('viridis')
plt.plot(hot_dark[1],hot_dark[0],'+',color=cmap(0))
plt.xlabel('pixel');plt.ylabel('pixel');plt.title('masked pixels with outlier threshold: %s'%outlier_threshold)

return hhmask
6 changes: 4 additions & 2 deletions pyCHX/chx_xpcs_xsvs_jupyter_V1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
#from pyCHX.chx_generic_functions import get_short_long_labels_from_qval_dict
#RUN_GUI = False
#from pyCHX.chx_libs import markers
from IPython import get_ipython
import pandas as pds
# temporary fix: get_data() uses depreciated np.float and gets imported from pyCHX/chx_correlationc.py -> clobber function with temporary fix:
%run /nsls2/data/chx/legacy/analysis/2022_3/lwiegart/development/chx_analysis_setup.ipynb

ip = get_ipython()
ip.run_line_magic("run", "/nsls2/data/chx/shared/CHX_Software/packages/environment_management/chx_analysis_setup.ipynb")

def get_t_iqc_uids( uid_list, setup_pargs, slice_num= 10, slice_width= 1):
'''Get Iq at different time edge (difined by slice_num and slice_width) for a list of uids
Expand Down

0 comments on commit fc45928

Please sign in to comment.