Skip to content

Commit

Permalink
Rewritten matching events to enable event building efficiency study (#84
Browse files Browse the repository at this point in the history
)

* move to development

* put fv cut as an option now in load_peaks

* removed useless plugins

* deprecated old event matching

* annotation

* redefined matching

* fixed typo

* inner _touching_windows bug

* deprecated load_events

* draft for load events refurbished
  • Loading branch information
FaroutYLq authored Mar 27, 2024
1 parent c47da32 commit 1ebbc9a
Show file tree
Hide file tree
Showing 3 changed files with 221 additions and 66 deletions.
2 changes: 1 addition & 1 deletion jobs/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ max_num_submit = 200
t_sleep = 1

[job]
container = xenonnt-2024.02.1.simg
container = xenonnt-development.simg
# SE bootstrapped
#runids = 47876,51907,47860,51905,51904,51906,51909,51911,51913,51939,47865,51910,49079,49077,53139,49432,52997,48693,48699,48692,49080,48698,49433,49028,53153,53167
# AmBe
Expand Down
198 changes: 140 additions & 58 deletions saltax/match/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import strax


def filter_out_missing_s1_s2(truth, match):
def _filter_out_missing_s1_s2(truth, match):
"""
Filter out simulated events that have no S1 or S2 or both
:param truth: truth from wfsim
Expand Down Expand Up @@ -34,7 +34,7 @@ def filter_out_missing_s1_s2(truth, match):
return truth[~bad_mask], match[~bad_mask]


def filter_out_multiple_s1_s2(truth, match):
def _filter_out_multiple_s1_s2(truth, match):
"""
Filter out simulated events that have multiple S1 or S2 reconstructed from wfsim.
:param truth: truth from wfsim
Expand All @@ -58,7 +58,7 @@ def filter_out_multiple_s1_s2(truth, match):
return truth[~bad_mask], match[~bad_mask]


def filter_out_not_found(truth, match, s1s2=1):
def _filter_out_not_found(truth, match, s1s2=1):
"""
Filter out simulated events whose S1 are not found by pema.
:param truth: truth from wfsim
Expand Down Expand Up @@ -88,7 +88,7 @@ def filter_out_not_found(truth, match, s1s2=1):
return truth[~bad_mask], match[~bad_mask]


def pair_events_to_filtered_truth(truth, events):
def _pair_events_to_filtered_truth(truth, events):
"""
Pair events to filtered truth.
:param truth: filtered truth
Expand Down Expand Up @@ -130,7 +130,7 @@ def pair_events_to_filtered_truth(truth, events):
return matched_to


def pair_events_to_matched_simu(matched_simu, events):
def _pair_events_to_matched_simu(matched_simu, events):
"""
Pair salted events to matched simulation events.
:param matched_simu: simulation events already matched to truth
Expand Down Expand Up @@ -160,31 +160,7 @@ def pair_events_to_matched_simu(matched_simu, events):
return matched_to


def pair_peaks_to_matched_simu(matched_simu, peaks, safeguard=0):
"""
Pair salted peaks to simulation peaks who have been matched to truth.
:param matched_simu: simulation peaks already matched to truth
:param peaks: peaks from saltax after reconstruction
:param safeguard: extension of time range to consider as matched, as a workaround for timing problem in wfsim s2
:return: ind_simu_matched_to_salt: the index of matched simulation peaks for each salted peak
:return: ind_salt_matched_to_simu: the index of matched salted peaks for each simulation peak
"""
windows = strax.touching_windows(peaks,
matched_simu,
window=safeguard)
windows_length = windows[:,1] - windows[:,0]
simu_matched_to_salt_mask = windows_length == 1
print("Filter out %s percent of simulation due to multiple or no matched sprinkled \
peaks"%(np.sum(~simu_matched_to_salt_mask)/len(matched_simu)*100))

matched_simu = matched_simu[simu_matched_to_salt_mask]
ind_simu_matched_to_salt = np.where(simu_matched_to_salt_mask)[0]
ind_salt_matched_to_simu = windows[simu_matched_to_salt_mask][:,0]

return ind_simu_matched_to_salt, ind_salt_matched_to_simu


def pair_salt_to_simu_events(truth, match, events_simu, events_salt):
def _pair_salt_to_simu_events(truth, match, events_simu, events_salt):
"""
Filter out bad simulation truth and then pair salted events to matched simulation events.
:param truth: filtered truth
Expand All @@ -193,42 +169,23 @@ def pair_salt_to_simu_events(truth, match, events_simu, events_salt):
:param events_salt: events from saltax after reconstruction
:return: ind_salt_matched_to_simu, ind_simu_matched_to_truth, truth_filtered, match_filtered
"""
truth_filtered, match_filtered = filter_out_missing_s1_s2(truth, match)
truth_filtered, match_filtered = filter_out_multiple_s1_s2(truth_filtered, match_filtered)
truth_filtered, match_filtered = filter_out_not_found(truth_filtered, match_filtered)
truth_filtered, match_filtered = _filter_out_missing_s1_s2(truth, match)
truth_filtered, match_filtered = _filter_out_multiple_s1_s2(truth_filtered, match_filtered)
truth_filtered, match_filtered = _filter_out_not_found(truth_filtered, match_filtered)

ind_simu_matched_to_truth = pair_events_to_filtered_truth(truth_filtered, events_simu)
ind_simu_matched_to_truth = _pair_events_to_filtered_truth(truth_filtered, events_simu)
events_simu_matched_to_truth = events_simu[ind_simu_matched_to_truth[ind_simu_matched_to_truth>=0]]

ind_salt_matched_to_simu = pair_events_to_matched_simu(events_simu_matched_to_truth,
ind_salt_matched_to_simu = _pair_events_to_matched_simu(events_simu_matched_to_truth,
events_salt)

return ind_salt_matched_to_simu, ind_simu_matched_to_truth, truth_filtered, match_filtered


def pair_salt_to_simu_peaks(truth, match, peaks_simu, peaks_salt):
"""
Filter out bad simulation truth and then pair salted events to matched simulation events.
:param truth: filtered truth
:param match: match_acceptance_extended from pema
:param peaks_simu: peaks from wfsim
:param peaks_salt: peaks from saltax after reconstruction
:return: ind_salt_matched_to_simu, ind_simu_matched_to_truth
"""
ind_simu_matched_to_truth = match['matched_to']
peaks_simu_matched_to_truth = peaks_simu[ind_simu_matched_to_truth]

(_ind_simu_matched_to_truth,
ind_salt_matched_to_simu) = pair_peaks_to_matched_simu(peaks_simu_matched_to_truth,
peaks_salt)
ind_simu_matched_to_truth = ind_simu_matched_to_truth[_ind_simu_matched_to_truth]

return ind_salt_matched_to_simu, ind_simu_matched_to_truth


def match_events(truth, match, events_simu, events_salt):
def match_events_deprecated(truth, match, events_simu, events_salt):
"""
Match salted events to simulation events.
Match salted events to simulation events. This function has been deprecated because it cannot track
event building efficiency in a proper way.
:param truth: truth from wfsim
:param match: match_acceptance_extended from pema
:param events_simu: event_info from wfsim
Expand All @@ -237,14 +194,95 @@ def match_events(truth, match, events_simu, events_salt):
"""
ind_salt_matched_to_simu, \
ind_simu_matched_to_truth, \
_, _ = pair_salt_to_simu_events(truth, match, events_simu, events_salt)
_, _ = _pair_salt_to_simu_events(truth, match, events_simu, events_salt)

events_simu_matched_to_truth = events_simu[ind_simu_matched_to_truth[ind_simu_matched_to_truth>=0]]
events_salt_matched_to_simu = events_salt[ind_salt_matched_to_simu[ind_salt_matched_to_simu>=0]]
events_simu_matched_to_salt = events_simu_matched_to_truth[ind_salt_matched_to_simu>=0]

return events_salt_matched_to_simu, events_simu_matched_to_salt


def match_events(events_simu, events_salt):
"""
Match salted events to simulation events based on event or S1-S2 timing, without checking
simulation truth.
The procedures are:
1. Filter out events_simu which is missing S1. Then the rest events_simu is like 'truth'
without ambience interference. Those events_simu are called events_simu_filtered.
2. Find indices of events_salt whose time range overlaps with events_simu_filtered:
ind_salt_event_found. Those events_salt[ind_salt_event_found] are called
events_salt_event_found.
3. Find indcies of events_simu_filtered whose time range overlaps with events_salt_event_found:
ind_simu_event_found. Those events_simu_filtered[ind_simu_event_found] are called
events_simu_event_found.
4. Find indcies of events_salt whose S1 time range overlaps with events_simu_filtered's S1 time
range: ind_salt_s1_found. Those events_salt[ind_salt_s1_found] are called events_salt_s1_found.
5. Find indcies of events_simu_filtered whose S1 time range overlaps with events_salt_s1_found:
ind_simu_s1_found. Those events_simu_filtered[ind_simu_s1_found] are called events_simu_s1_found.
6. Find indcies of events_salt whose S2 time range overlaps with events_simu_filtered's S2 time
range: ind_salt_s2_found. Those events_salt[ind_salt_s2_found] are called events_salt_s2_found.
7. Find indcies of events_simu_filtered whose S2 time range overlaps with events_salt_s2_found:
ind_simu_s2_found. Those events_simu_filtered[ind_simu_s2_found] are called events_simu_s2_found.
:param events_simu: event_info from wfsim
:param events_salt: event_info from saltax
:return: events_simu_filtered,
ind_salt_event_found, ind_simu_event_found, ind_simu_event_lost, ind_simu_event_split,
ind_salt_s1_found, ind_simu_s1_found,
ind_salt_s2_found, ind_simu_s2_found
"""
# Step 1.
# Filter out events_simu which is missing S1
events_simu_filtered = events_simu[events_simu['s1_time']>=0]

# Step 2 & 3.
# Find indices of events_salt whose time range overlaps with events_simu_filtered
event_touching_windows = strax.touching_windows(events_salt, events_simu_filtered)
event_windows_length = event_touching_windows[:,1] - event_touching_windows[:,0]
# If windows_length is not 1, the sprinkled event is either split or not found
mask_simu_event_found = event_windows_length == 1
mask_simu_event_lost = event_windows_length == 0
mask_simu_event_split = event_windows_length >= 2
ind_simu_event_found = np.where(mask_simu_event_found)[0]
ind_simu_event_lost = np.where(mask_simu_event_lost)[0]
ind_simu_event_split = np.where(mask_simu_event_split)[0]
# Find indcies of events_salt whose event time range overlaps with events_simu_filtered
ind_salt_event_found = event_touching_windows[mask_simu_event_found][:,0]

# Step 4 & 5.
# Assumed s1 times has been sorted! It should be a safe assumption because their events are sorted.
s1_touching_windows = strax.processing.general._touching_windows(
events_salt["s1_time"],
events_salt["s1_endtime"],
events_simu_filtered["s1_time"],
events_simu_filtered["s1_endtime"],
)
s1_windows_length = s1_touching_windows[:,1] - s1_touching_windows[:,0]
mask_simu_s1_found = s1_windows_length == 1
ind_simu_s1_found = np.where(mask_simu_s1_found)[0]
ind_salt_s1_found = s1_touching_windows[mask_simu_s1_found][:,0]

# Step 6 & 7.
# Assumed s2 times has been sorted! It should be a safe assumption because their events are sorted.
s2_touching_windows = strax.processing.general._touching_windows(
events_salt["s2_time"],
events_salt["s2_endtime"],
events_simu_filtered["s2_time"],
events_simu_filtered["s2_endtime"],
)
s2_windows_length = s2_touching_windows[:,1] - s2_touching_windows[:,0]
mask_simu_s2_found = s2_windows_length == 1
ind_simu_s2_found = np.where(mask_simu_s2_found)[0]
ind_salt_s2_found = s2_touching_windows[mask_simu_s2_found][:,0]

return (
events_simu_filtered,
ind_salt_event_found, ind_simu_event_found, ind_simu_event_lost, ind_simu_event_split,
ind_salt_s1_found, ind_simu_s1_found,
ind_salt_s2_found, ind_simu_s2_found
)


def match_peaks(truth, match, peaks_simu, peaks_salt):
"""
Match salted peaks to simulation peaks.
Expand All @@ -267,3 +305,47 @@ def match_peaks(truth, match, peaks_simu, peaks_salt):
peaks_simu_matched_to_salt = peaks_simu_matched_to_truth[ind_salt_matched_to_simu>=0]

return peaks_salt_matched_to_simu, peaks_simu_matched_to_salt


def pair_salt_to_simu_peaks(truth, match, peaks_simu, peaks_salt):
"""
Filter out bad simulation truth and then pair salted events to matched simulation events.
:param truth: filtered truth
:param match: match_acceptance_extended from pema
:param peaks_simu: peaks from wfsim
:param peaks_salt: peaks from saltax after reconstruction
:return: ind_salt_matched_to_simu, ind_simu_matched_to_truth
"""
ind_simu_matched_to_truth = match['matched_to']
peaks_simu_matched_to_truth = peaks_simu[ind_simu_matched_to_truth]

(_ind_simu_matched_to_truth,
ind_salt_matched_to_simu) = pair_peaks_to_matched_simu(peaks_simu_matched_to_truth,
peaks_salt)
ind_simu_matched_to_truth = ind_simu_matched_to_truth[_ind_simu_matched_to_truth]

return ind_salt_matched_to_simu, ind_simu_matched_to_truth


def pair_peaks_to_matched_simu(matched_simu, peaks, safeguard=0):
"""
Pair salted peaks to simulation peaks who have been matched to truth.
:param matched_simu: simulation peaks already matched to truth
:param peaks: peaks from saltax after reconstruction
:param safeguard: extension of time range to consider as matched, as a workaround for timing problem in wfsim s2
:return: ind_simu_matched_to_salt: the index of matched simulation peaks for each salted peak
:return: ind_salt_matched_to_simu: the index of matched salted peaks for each simulation peak
"""
windows = strax.touching_windows(peaks,
matched_simu,
window=safeguard)
windows_length = windows[:,1] - windows[:,0]
simu_matched_to_salt_mask = windows_length == 1
print("Filter out %s percent of simulation due to multiple or no matched sprinkled \
peaks"%(np.sum(~simu_matched_to_salt_mask)/len(matched_simu)*100))

matched_simu = matched_simu[simu_matched_to_salt_mask]
ind_simu_matched_to_salt = np.where(simu_matched_to_salt_mask)[0]
ind_salt_matched_to_simu = windows[simu_matched_to_salt_mask][:,0]

return ind_simu_matched_to_salt, ind_salt_matched_to_simu
87 changes: 80 additions & 7 deletions saltax/match/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,16 @@
'cut_cs2_area_fraction_top',])


def load_peaks(runs, st_salt, st_simu, plugins=('peak_basics', 'peak_positions',
'peak_shadow', 'peak_ambience',
'cut_se_peaks')):
def load_peaks(runs, st_salt, st_simu,
plugins=('peak_basics', 'peak_positions_mlp'),
truth_fv_cut=True):
"""
Load peaks from the runs and do basic filtering suggeted by saltax.match_peaks
:param runs: list of runs.
:param st_salt: saltax context for salt mode
:param st_simu: saltax context for simu mode
:param plugins: plugins to be loaded, default to ('peak_basics', 'peak_positions_mlp')
:param truth_fv_cut: whether to apply truth FV cut, default to True
:return: peaks_simu: peaks related plugins from simulated dataset
:return: peaks_salt: peaks related plugins from sprinkled dataset
:return: truth: truth information in simulation
Expand All @@ -86,10 +88,14 @@ def load_peaks(runs, st_salt, st_simu, plugins=('peak_basics', 'peak_positions',
match_i = st_simu.get_array(run, 'match_acceptance_extended', progress_bar=False)

# TODO: Ugly hardcoding for FV cut, need to be fixed
peaks_salt_matched_to_simu_i, \
peaks_simu_matched_to_salt_i = saltax.match_peaks(truth_i[(truth_i['z']<-13)&(truth_i['z']>-145)&(truth_i['x']**2+truth_i['y']**2<64**2)],
match_i[(truth_i['z']<-13)&(truth_i['z']>-145)&(truth_i['x']**2+truth_i['y']**2<64**2)],
peaks_simu_i, peaks_salt_i)
if truth_fv_cut:
peaks_salt_matched_to_simu_i, \
peaks_simu_matched_to_salt_i = saltax.match_peaks(truth_i[(truth_i['z']<-13)&(truth_i['z']>-145)&(truth_i['x']**2+truth_i['y']**2<64**2)],
match_i[(truth_i['z']<-13)&(truth_i['z']>-145)&(truth_i['x']**2+truth_i['y']**2<64**2)],
peaks_simu_i, peaks_salt_i)
else:
peaks_salt_matched_to_simu_i, \
peaks_simu_matched_to_salt_i = saltax.match_peaks(truth_i, match_i, peaks_simu_i, peaks_salt_i)

if i==0:
peaks_simu = peaks_simu_i
Expand All @@ -112,6 +118,73 @@ def load_peaks(runs, st_salt, st_simu, plugins=('peak_basics', 'peak_positions',


def load_events(runs, st_salt, st_simu, plugins=('event_info', 'cuts_basic')):
"""
Load events from the runs and do basic filtering suggeted by saltax.match_events
:param runs: list of runs.
:param st_salt: saltax context for salt mode
:param st_simu: saltax context for simu mode
:return: events_simu: events from simulated dataset, filtered out those who miss S1
:return: events_salt: events from sprinkled dataset
:return inds_dict: dictionary of indices of events from sprinkled or filtered simulated dataset,
regarding matching events or s1 or s2
"""
# Initialize the dictionary to store the indices
inds_dict = {
"ind_salt_event_found": np.array([], dtype=np.int32),
"ind_salt_s1_found": np.array([], dtype=np.int32),
"ind_salt_s2_found": np.array([], dtype=np.int32),
"ind_simu_event_found": np.array([], dtype=np.int32),
"ind_simu_s1_found": np.array([], dtype=np.int32),
"ind_simu_s2_found": np.array([], dtype=np.int32)
}

for i, run in enumerate(runs):
print("Loading run %s"%(run))

# Load plugins for both salt and simu
events_simu_i = st_simu.get_array(run, plugins, progress_bar=False)
events_salt_i = st_salt.get_array(run, plugins, progress_bar=False)

# Get matching result
(
events_simu_filtered_i,
ind_salt_event_found_i, ind_simu_event_found_i, ind_simu_event_lost_i, ind_simu_event_split_i,
ind_salt_s1_found_i, ind_simu_s1_found_i,
ind_salt_s2_found_i, ind_simu_s2_found_i
) = saltax.match_events(events_simu_i, events_salt_i)

# Load the indices into the dictionary
inds_dict["ind_salt_event_found"] = np.concatenate(
(inds_dict["ind_salt_event_found"], ind_salt_event_found_i)
)
inds_dict["ind_salt_s1_found"] = np.concatenate(
(inds_dict["ind_salt_s1_found"], ind_salt_s1_found_i)
)
inds_dict["ind_salt_s2_found"] = np.concatenate(
(inds_dict["ind_salt_s2_found"], ind_salt_s2_found_i)
)
inds_dict["ind_simu_event_found"] = np.concatenate(
(inds_dict["ind_simu_event_found"], ind_simu_event_found_i)
)
inds_dict["ind_simu_s1_found"] = np.concatenate(
(inds_dict["ind_simu_s1_found"], ind_simu_s1_found_i)
)
inds_dict["ind_simu_s2_found"] = np.concatenate(
(inds_dict["ind_simu_s2_found"], ind_simu_s2_found_i)
)

# Concatenate the events
if i==0:
events_simu = events_simu_filtered_i
events_salt = events_salt_i
else:
events_simu = np.concatenate((events_simu, events_simu_filtered_i))
events_salt = np.concatenate((events_salt, events_salt_i))

return events_simu, events_salt, inds_dict


def load_events_deprecated(runs, st_salt, st_simu, plugins=('event_info', 'cuts_basic')):
"""
Load events from the runs and do basic filtering suggeted by saltax.match_events
:param runs: list of runs.
Expand Down

0 comments on commit 1ebbc9a

Please sign in to comment.