Skip to content

Commit

Permalink
Some changes to how extreme IFARs are dealt with (gwastro#4479)
Browse files Browse the repository at this point in the history
* Move conservative addition into count_n_louder function

* fix

* Separate the +1 or not decision out from the get_n_louder calculation

* cc

* remove testing change

* fix docstrings

* TD comment

* TD suggestion in other places

* TD suggestion

* CC

* Update pycbc/events/significance.py

Co-authored-by: Thomas Dent <[email protected]>

* Allow limits on FAR to be set

* missed import

* double thinko

* some docstring fixes

* fix edge case where template_duration can be calculated as zero

* remove testing changes

* far limit was not being passed through properly

* Update bin/all_sky_search/pycbc_coinc_statmap

* Update bin/all_sky_search/pycbc_coinc_statmap

* Update bin/all_sky_search/pycbc_coinc_statmap

* allow different IFAR limits for different IFO combinations

* Applying limits with incorrect units applies them incorrectly

* allow upper and lower limits on IFAR in foundmissed plots

* CC complaints and removing unused flag from functions

* CCv2

* Some improvements to comments, remove changes to pycbc_apply_rerank

* Actually remove apply_rerank

* TD comment

* Update pycbc/events/significance.py

Co-authored-by: Thomas Dent <[email protected]>

* Dont log if not applying a FAR limit

* missed update

* undo mistaken change

---------

Co-authored-by: Thomas Dent <[email protected]>
  • Loading branch information
GarethCabournDavies and tdent authored Sep 18, 2023
1 parent 2be81bd commit 524b384
Show file tree
Hide file tree
Showing 9 changed files with 346 additions and 147 deletions.
76 changes: 42 additions & 34 deletions bin/all_sky_search/pycbc_add_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -160,14 +160,15 @@ if not injection_style:
if k not in all_ifos:
pycbc.io.combine_and_copy(f, files, 'background_exc/' + k)

# create dataset of ifo combination strings
fg_coinc_type = np.array([])
for f_in in files:
key = get_ifo_string(f_in).replace(' ','')
combo_repeat = np.array(np.repeat(key.encode('utf8'),
f_in['foreground/stat'].size))
fg_coinc_type = np.concatenate([fg_coinc_type, combo_repeat])

if args.output_coinc_types:
# create dataset of ifo combination strings
fg_coinc_type = np.array([])
for f_in in files:
key = get_ifo_string(f_in).replace(' ','')
combo_repeat = np.array(np.repeat(key.encode('utf8'),
f_in['foreground/fap'].size))
fg_coinc_type = np.concatenate([fg_coinc_type, combo_repeat])
f['foreground/ifo_combination'] = fg_coinc_type

logging.info('Collating triggers into single structure')
Expand Down Expand Up @@ -271,6 +272,7 @@ for key in f['foreground'].keys():
for k in f['foreground/%s' % key].keys():
filter_dataset(f, 'foreground/{}/{}'.format(key, k), cidx)

fg_coinc_type = fg_coinc_type[cidx]
n_triggers = f['foreground/stat'].size

logging.info('Calculating event times to determine which types of coinc '
Expand Down Expand Up @@ -301,49 +303,47 @@ far = {}
far_exc = {}
bg_time = {}
bg_time_exc = {}
fnlouder = {}
fnlouder_exc = {}

if injection_style:
# if background files are provided, this is being used for injections
# use provided background files to calculate the FARs
for bg_fname in args.background_files:
bg_f = h5py.File(bg_fname, 'r')
ifo_combo_key = bg_f.attrs['ifos'].replace(' ','')
_, fnlouder[ifo_combo_key] = \
significance.get_n_louder(
_, far[ifo_combo_key] = significance.get_far(
bg_f['background/stat'][:],
f['foreground/stat'][:],
bg_f['background/decimation_factor'][:],
bg_f.attrs['background_time'],
**significance_dict[ifo_combo_key])

far[ifo_combo_key] = (fnlouder[ifo_combo_key] + 1) / bg_f.attrs['background_time']
_, fnlouder_exc[ifo_combo_key] = \
significance.get_n_louder(
_, far_exc[ifo_combo_key] = \
significance.get_far(
bg_f['background_exc/stat'][:],
f['foreground/stat'][:],
bg_f['background_exc/decimation_factor'][:],
bg_f.attrs['background_time_exc'],
**significance_dict[ifo_combo_key])
far_exc[ifo_combo_key] = (fnlouder_exc[ifo_combo_key] + 1) / bg_f.attrs['background_time_exc']
else:
# if not injection style input files, then the input files will have the
# background included
for f_in in files:
ifo_combo_key = get_ifo_string(f_in).replace(' ','')
_, fnlouder[ifo_combo_key] = \
significance.get_n_louder(
_, far[ifo_combo_key] = \
significance.get_far(
f_in['background/stat'][:],
f['foreground/stat'][:],
f_in['background/decimation_factor'][:],
f_in.attrs['background_time'],
**significance_dict[ifo_combo_key])
far[ifo_combo_key] = (fnlouder[ifo_combo_key] + 1) / f_in.attrs['background_time']
_, fnlouder_exc[ifo_combo_key] = \
significance.get_n_louder(

_, far_exc[ifo_combo_key] = \
significance.get_far(
f_in['background_exc/stat'][:],
f['foreground/stat'][:],
f_in['background_exc/decimation_factor'][:],
f_in.attrs['background_time_exc'],
**significance_dict[ifo_combo_key])
far_exc[ifo_combo_key] = (fnlouder_exc[ifo_combo_key] + 1) / f_in.attrs['background_time_exc']

logging.info('Combining false alarm rates from all available backgrounds')

Expand All @@ -357,8 +357,15 @@ fg_fars = np.array([list(far[ct]) for ct in all_ifo_combos])
fg_fars_exc = np.array([list(far_exc[ct]) for ct in all_ifo_combos])

# Combine the FARs with the mask to obtain the new ifars
fg_ifar = conv.sec_to_year(1. / np.sum(isincombo_mask * fg_fars, axis=0))
fg_ifar_exc = conv.sec_to_year(1. / np.sum(isincombo_mask * fg_fars_exc, axis=0))
fg_fars_out = np.sum(isincombo_mask * fg_fars, axis=0)
fg_fars_exc_out = np.sum(isincombo_mask * fg_fars_exc, axis=0)

# Apply any limits as appropriate
fg_fars_out = significance.apply_far_limit(fg_fars_out, significance_dict, combo=fg_coinc_type)
fg_fars_exc_out = significance.apply_far_limit(fg_fars_exc_out, significance_dict, combo=fg_coinc_type)

fg_ifar = conv.sec_to_year(1. / fg_fars_out)
fg_ifar_exc = conv.sec_to_year(1. / fg_fars_exc_out)
fg_time = f.attrs['foreground_time']
del isincombo_mask, fg_fars, fg_fars_exc, _

Expand Down Expand Up @@ -415,7 +422,7 @@ bg_coinc_type = np.array([])
for f_in in files:
key = get_ifo_string(f_in).replace(' ','')
combo_repeat_fg = np.array(np.repeat(key.encode('utf8'),
f_in['foreground/fap'].size))
f_in['foreground/stat'].size))
fg_coinc_type = np.concatenate([fg_coinc_type, combo_repeat_fg])
combo_repeat_bg = np.array(np.repeat(key.encode('utf8'),
f_in['background/stat'].size))
Expand Down Expand Up @@ -587,29 +594,30 @@ while True:
test_times = np.array([pycbc.events.mean_if_greater_than_zero(tc)[0]
for tc in zip(*times_tuple)])
for key in all_ifo_combos:
bnlouder, fnlouder = significance.get_n_louder(
sep_bg_data[key].data['stat'],
sep_fg_data[key].data['stat'],
sep_bg_data[key].data['decimation_factor'],
**significance_dict[ifo_combo_key])
# In principle, bg time should be adjusted, but is expected to be a
# negligible correction
fg_time_ct[key] -= args.cluster_window
bg_t_y = conv.sec_to_year(bg_time_ct[key])
fg_t_y = conv.sec_to_year(fg_time_ct[key])
sep_bg_data[key].data['ifar'] = bg_t_y / (bnlouder + 1)
sep_fg_data[key].data['ifar'] = bg_t_y / (fnlouder + 1)
bg_far, fg_far = significance.get_far(
sep_bg_data[key].data['stat'],
sep_fg_data[key].data['stat'],
sep_bg_data[key].data['decimation_factor'],
bg_t_y,
**significance_dict[ifo_combo_key])
sep_bg_data[key].data['ifar'] = 1. / bg_far
sep_fg_data[key].data['ifar'] = 1. / fg_far
sep_fg_data[key].data['fap'] = 1 - \
np.exp(-fg_t_y / sep_fg_data[key].data['ifar'])
np.exp(-fg_t_y * fg_far)

logging.info("Recalculating combined IFARs")
for key in all_ifo_combos:
bnlouder, fnlouder = significance.get_n_louder(
_, far[key] = significance.get_far(
sep_bg_data[key].data['stat'],
combined_fg_data.data['stat'],
sep_bg_data[key].data['decimation_factor'],
bg_time_ct[key],
**significance_dict[ifo_combo_key])
far[key] = (fnlouder + 1) / bg_time_ct[key]
# Set up variable for whether each coincidence is available in each coincidence time
is_in_combo_time[key] = np.zeros(n_triggers)
end_times = np.array(f['segments/%s/end' % key][:])
Expand Down
66 changes: 37 additions & 29 deletions bin/all_sky_search/pycbc_coinc_statmap
Original file line number Diff line number Diff line change
Expand Up @@ -241,23 +241,29 @@ fore_stat = all_trigs.stat[fore_locs]

# Cumulative array of inclusive background triggers and the number of
# inclusive background triggers louder than each foreground trigger
back_cnum, fnlouder = significance.get_n_louder(
bg_far, fg_far = significance.get_far(
back_stat,
fore_stat,
all_trigs.decimation_factor[back_locs],
background_time,
**significance_dict[ifo_combo])

# Cumulative array of exclusive background triggers and the number
# of exclusive background triggers louder than each foreground trigger
back_cnum_exc, fnlouder_exc = significance.get_n_louder(
bg_far_exc, fg_far_exc = significance.get_far(
exc_zero_trigs.stat,
fore_stat,
exc_zero_trigs.decimation_factor,
background_time_exc,
**significance_dict[ifo_combo])

f['background/ifar'] = conv.sec_to_year(background_time / (back_cnum + 1))
f['background_exc/ifar'] = conv.sec_to_year(background_time_exc /
(back_cnum_exc + 1))
fg_far = significance.apply_far_limit(fg_far, significance_dict, combo=ifo_combo)
bg_far = significance.apply_far_limit(bg_far, significance_dict, combo=ifo_combo)
fg_far_exc = significance.apply_far_limit(fg_far_exc, significance_dict, combo=ifo_combo)
bg_far_exc = significance.apply_far_limit(bg_far_exc, significance_dict, combo=ifo_combo)

f['background/ifar'] = conv.sec_to_year(1. / bg_far)
f['background_exc/ifar'] = conv.sec_to_year(1. / bg_far_exc)
f.attrs['background_time'] = background_time
f.attrs['foreground_time'] = coinc_time
f.attrs['background_time_exc'] = background_time_exc
Expand All @@ -266,11 +272,11 @@ f.attrs['foreground_time_exc'] = coinc_time_exc
logging.info("calculating ifar/fap values")

if fore_locs.sum() > 0:
ifar = background_time / (fnlouder + 1)
ifar = 1. / fg_far
fap = 1 - numpy.exp(- coinc_time / ifar)
f['foreground/ifar'] = conv.sec_to_year(ifar)
f['foreground/fap'] = fap
ifar_exc = background_time_exc / (fnlouder_exc + 1)
ifar_exc = 1. / fg_far_exc
fap_exc = 1 - numpy.exp(- coinc_time_exc / ifar_exc)
f['foreground/ifar_exc'] = conv.sec_to_year(ifar_exc)
f['foreground/fap_exc'] = fap_exc
Expand All @@ -295,28 +301,27 @@ h_iterations = 0

orig_fore_stat = fore_stat

# Assign a new variable to keep track of whether we care about fnlouder or
# fnlouder_exc. Whether we want to remove hierarchically against inclusive
# Assign a new variable to keep track of whether we care about ifar or
# ifar_exc. Whether we want to remove hierarchically against inclusive
# or exclusive background.
if args.max_hierarchical_removal != 0:
# If user wants to remove against inclusive background.
if args.hierarchical_removal_against == 'inclusive':
louder_foreground = fnlouder
ifar_foreground = ifar
# Otherwise user wants to remove against exclusive background
else :
louder_foreground = fnlouder_exc
ifar_foreground = ifar_exc
else :
# It doesn't matter if you choose louder_foreground = fnlouder
# or vice versa, the while loop below will break if
# louder_foreground doesn't contain 0, or at the comparison
# h_iterations == args.max_hierarchical_removal. But this avoids
# a NameError
louder_foreground = fnlouder
# It doesn't matter if you choose ifar_foreground = ifar
# or ifar_exc, the while loop below will break
# straight away. But this avoids a NameError
ifar_foreground = ifar

# Step 2 : Loop until we don't have to hierarchically remove anymore. This
# will happen when fnlouder has no elements that equal 0.
# will happen when ifar_foreground has no elements greater than
# the background time.

while numpy.any(louder_foreground == 0):
while numpy.any(ifar_foreground >= background_time):
# If the user wants to stop doing hierarchical removals after a set
# number of iterations then break when that happens.
if (h_iterations == args.max_hierarchical_removal):
Expand All @@ -326,7 +331,7 @@ while numpy.any(louder_foreground == 0):
# downstream codes.
if h_iterations == 0:
f['background_h%s/stat' % h_iterations] = back_stat
f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(background_time / (back_cnum + 1))
f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(1. / bg_far)
f['background_h%s/timeslide_id' % h_iterations] = all_trigs.data['timeslide_id'][back_locs]
f['foreground_h%s/stat' % h_iterations] = fore_stat
f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(ifar)
Expand Down Expand Up @@ -408,38 +413,41 @@ while numpy.any(louder_foreground == 0):
logging.info("Calculating FAN from background statistic values")
back_stat = all_trigs.stat[back_locs]
fore_stat = all_trigs.stat[fore_locs]
back_cnum, fnlouder = significance.get_n_louder(
bg_far, fg_far = significance.get_far(
back_stat,
fore_stat,
all_trigs.decimation_factor[back_locs],
background_time,
return_counts=True,
**significance_dict[ifo_combo])

# Update the louder_foreground criteria depending on whether foreground
# Update the ifar_foreground criteria depending on whether foreground
# triggers are being removed via inclusive or exclusive background.
if args.hierarchical_removal_against == 'inclusive':
louder_foreground = fnlouder
ifar_foreground = 1. / fg_far

# Exclusive background doesn't change when removing foreground triggers.
# So we don't have to take back_cnum_exc, just repopulate fnlouder_exc
# So we don't have to take background ifar, just repopulate ifar_foreground
else :
_, fnlouder_exc = significance.get_n_louder(
_, fg_far_exc = significance.get_far(
exc_zero_trigs.stat,
fore_stat,
exc_zero_trigs.decimation_factor,
background_time_exc,
**significance_dict[ifo_combo])
louder_foreground = fnlouder_exc
# louder_foreground has been updated and the code can continue.
ifar_foreground = 1. / fg_far_exc
# ifar_foreground has been updated and the code can continue.

logging.info("Calculating ifar/fap values")
f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(background_time / (back_cnum + 1))
f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(1. / bg_far)
f.attrs['background_time_h%s' % h_iterations] = background_time
f.attrs['foreground_time_h%s' % h_iterations] = coinc_time

if fore_locs.sum() > 0:
# Write ranking statistic to file just for downstream plotting code
f['foreground_h%s/stat' % h_iterations] = fore_stat

ifar = background_time / (fnlouder + 1)
ifar = 1. / fg_far
fap = 1 - numpy.exp(- coinc_time / ifar)
f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(ifar)
f['foreground_h%s/fap' % h_iterations] = fap
Expand Down
7 changes: 5 additions & 2 deletions bin/all_sky_search/pycbc_coinc_statmap_inj
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,16 @@ f.attrs['foreground_time'] = coinc_time

if len(zdata) > 0:

_, fnlouder_exc = significance.get_n_louder(
_, fg_far_exc = significance.get_far(
back_stat,
zdata.stat,
dec_fac,
background_time,
**significance_dict[ifo_key])

ifar_exc = background_time / (fnlouder_exc + 1)
fg_far_exc = significance.apply_far_limit(fg_far_exc, significance_dict, combo=ifo_key)

ifar_exc = 1. / fg_far_exc
fap_exc = 1 - numpy.exp(- coinc_time / ifar_exc)
f['foreground/ifar_exc'] = conv.sec_to_year(ifar_exc)
f['foreground/fap_exc'] = fap_exc
Expand Down
21 changes: 12 additions & 9 deletions bin/all_sky_search/pycbc_exclude_zerolag
Original file line number Diff line number Diff line change
Expand Up @@ -94,21 +94,24 @@ for k in filtered_trigs.data:
f_out['background_exc/%s' % k] = filtered_trigs.data[k]

logging.info('Recalculating IFARs')
bnlouder, fnlouder = significance.get_n_louder(
bg_far, fg_far = significance.get_far(
filtered_trigs.data['stat'],
f_in['foreground/stat'][:],
filtered_trigs.data['decimation_factor'],
f_in.attrs['background_time_exc'],
**significance_dict[all_ifo_key])
# In principle exc background time should be recalculated
# However we expect exclude zerolag to be a (very) small correction
fg_time_exc = conv.sec_to_year(f_in.attrs['foreground_time_exc'])
bg_time_exc = conv.sec_to_year(f_in.attrs['background_time_exc'])
fg_ifar_exc = bg_time_exc / (fnlouder + 1)
bg_ifar_exc = bg_time_exc / (bnlouder + 1)

fg_far = significance.apply_far_limit(fg_far, significance_dict, combo=all_ifo_key)
bg_far = significance.apply_far_limit(bg_far, significance_dict, combo=all_ifo_key)

fg_ifar_exc = 1. / fg_far
bg_ifar_exc = 1. / bg_far

logging.info('Writing updated ifars to file')
f_out['foreground/ifar_exc'][:] = fg_ifar_exc
f_out['foreground/ifar_exc'][:] = conv.sec_to_year(fg_ifar_exc)
f_out['background_exc/ifar'][:] = conv.sec_to_year(bg_ifar_exc)

fg_time_exc = conv.sec_to_year(f_in.attrs['foreground_time_exc'])
f_out['foreground/fap_exc'][:] = 1 - np.exp(-fg_time_exc / fg_ifar_exc)
f_out['background_exc/ifar'][:] = bg_ifar_exc

logging.info("Done!")
Loading

0 comments on commit 524b384

Please sign in to comment.