Some changes to how extreme IFARs are dealt with (gwastro#4479)

* Move conservative addition into count_n_louder function * fix * Separate the +1 or not decision out from the get_n_louder calculation * cc * remove testing change * fix docstrings * TD comment * TD suggestion in other places * TD suggestion * CC * Update pycbc/events/significance.py Co-authored-by: Thomas Dent <[email protected]> * Allow limits on FAR to be set * missed import * double thinko * some docstring fixes * fix edge case where template_duration can be calculated as zero * remove testing changes * far limit was not being passed through properly * Update bin/all_sky_search/pycbc_coinc_statmap * Update bin/all_sky_search/pycbc_coinc_statmap * Update bin/all_sky_search/pycbc_coinc_statmap * allow different IFAR limits for different IFO combinations * Applying limits with incorrect units applies them incorrectly * allow upper and lower limits on IFAR in foundmissed plots * CC complaints and removing unused flag from functions * CCv2 * Some improvements to comments, remove changes to pycbc_apply_rerank * Actually remove apply_rerank * TD comment * Update pycbc/events/significance.py Co-authored-by: Thomas Dent <[email protected]> * Dont log if not applying a FAR limit * missed update * undo mistaken change --------- Co-authored-by: Thomas Dent <[email protected]>
yi-fan-wang · Sep 18, 2023 · 524b384 · 524b384
1 parent 2be81bd
commit 524b384
Show file tree

Hide file tree

Showing 9 changed files with 346 additions and 147 deletions.
diff --git a/bin/all_sky_search/pycbc_add_statmap b/bin/all_sky_search/pycbc_add_statmap
@@ -160,14 +160,15 @@ if not injection_style:
         if k not in all_ifos:
             pycbc.io.combine_and_copy(f, files, 'background_exc/' + k)
 
+# create dataset of ifo combination strings
+fg_coinc_type = np.array([])
+for f_in in files:
+    key = get_ifo_string(f_in).replace(' ','')
+    combo_repeat = np.array(np.repeat(key.encode('utf8'),
+                            f_in['foreground/stat'].size))
+    fg_coinc_type = np.concatenate([fg_coinc_type, combo_repeat])
+
 if args.output_coinc_types:
-    # create dataset of ifo combination strings
-    fg_coinc_type = np.array([])
-    for f_in in files:
-        key = get_ifo_string(f_in).replace(' ','')
-        combo_repeat = np.array(np.repeat(key.encode('utf8'),
-                                f_in['foreground/fap'].size))
-        fg_coinc_type = np.concatenate([fg_coinc_type, combo_repeat])
     f['foreground/ifo_combination'] = fg_coinc_type
 
 logging.info('Collating triggers into single structure')
@@ -271,6 +272,7 @@ for key in f['foreground'].keys():
         for k in f['foreground/%s' % key].keys():
             filter_dataset(f, 'foreground/{}/{}'.format(key, k), cidx)
 
+fg_coinc_type = fg_coinc_type[cidx]
 n_triggers = f['foreground/stat'].size
 
 logging.info('Calculating event times to determine which types of coinc '
@@ -301,49 +303,47 @@ far = {}
 far_exc = {}
 bg_time = {}
 bg_time_exc = {}
-fnlouder = {}
-fnlouder_exc = {}
 
 if injection_style:
     # if background files are provided, this is being used for injections
     # use provided background files to calculate the FARs
     for bg_fname in args.background_files:
         bg_f = h5py.File(bg_fname, 'r')
         ifo_combo_key = bg_f.attrs['ifos'].replace(' ','')
-        _, fnlouder[ifo_combo_key] = \
-            significance.get_n_louder(
+        _, far[ifo_combo_key] = significance.get_far(
                 bg_f['background/stat'][:],
                 f['foreground/stat'][:],
                 bg_f['background/decimation_factor'][:],
+                bg_f.attrs['background_time'],
                 **significance_dict[ifo_combo_key])
 
-        far[ifo_combo_key] = (fnlouder[ifo_combo_key] + 1) / bg_f.attrs['background_time']
-        _, fnlouder_exc[ifo_combo_key] = \
-            significance.get_n_louder(
+        _, far_exc[ifo_combo_key] = \
+            significance.get_far(
                 bg_f['background_exc/stat'][:],
                 f['foreground/stat'][:],
                 bg_f['background_exc/decimation_factor'][:],
+                bg_f.attrs['background_time_exc'],
                 **significance_dict[ifo_combo_key])
-        far_exc[ifo_combo_key] = (fnlouder_exc[ifo_combo_key] + 1) / bg_f.attrs['background_time_exc']
 else:
     # if not injection style input files, then the input files will have the
     # background included
     for f_in in files:
         ifo_combo_key = get_ifo_string(f_in).replace(' ','')
-        _, fnlouder[ifo_combo_key] = \
-            significance.get_n_louder(
+        _, far[ifo_combo_key] = \
+            significance.get_far(
                 f_in['background/stat'][:],
                 f['foreground/stat'][:],
                 f_in['background/decimation_factor'][:],
+                f_in.attrs['background_time'],
                 **significance_dict[ifo_combo_key])
-        far[ifo_combo_key] = (fnlouder[ifo_combo_key] + 1) / f_in.attrs['background_time']
-        _, fnlouder_exc[ifo_combo_key] = \
-            significance.get_n_louder(
+
+        _, far_exc[ifo_combo_key] = \
+            significance.get_far(
                 f_in['background_exc/stat'][:],
                 f['foreground/stat'][:],
                 f_in['background_exc/decimation_factor'][:],
+                f_in.attrs['background_time_exc'],
                 **significance_dict[ifo_combo_key])
-        far_exc[ifo_combo_key] = (fnlouder_exc[ifo_combo_key] + 1) / f_in.attrs['background_time_exc']
 
 logging.info('Combining false alarm rates from all available backgrounds')
 
@@ -357,8 +357,15 @@ fg_fars = np.array([list(far[ct]) for ct in all_ifo_combos])
 fg_fars_exc = np.array([list(far_exc[ct]) for ct in all_ifo_combos])
 
 # Combine the FARs with the mask to obtain the new ifars
-fg_ifar = conv.sec_to_year(1. / np.sum(isincombo_mask * fg_fars, axis=0))
-fg_ifar_exc = conv.sec_to_year(1. / np.sum(isincombo_mask * fg_fars_exc, axis=0))
+fg_fars_out = np.sum(isincombo_mask * fg_fars, axis=0)
+fg_fars_exc_out = np.sum(isincombo_mask * fg_fars_exc, axis=0)
+
+# Apply any limits as appropriate
+fg_fars_out = significance.apply_far_limit(fg_fars_out, significance_dict, combo=fg_coinc_type)
+fg_fars_exc_out = significance.apply_far_limit(fg_fars_exc_out, significance_dict, combo=fg_coinc_type)
+
+fg_ifar = conv.sec_to_year(1. / fg_fars_out)
+fg_ifar_exc = conv.sec_to_year(1. / fg_fars_exc_out)
 fg_time = f.attrs['foreground_time']
 del isincombo_mask, fg_fars, fg_fars_exc, _
 
@@ -415,7 +422,7 @@ bg_coinc_type = np.array([])
 for f_in in files:
     key = get_ifo_string(f_in).replace(' ','')
     combo_repeat_fg = np.array(np.repeat(key.encode('utf8'),
-                               f_in['foreground/fap'].size))
+                               f_in['foreground/stat'].size))
     fg_coinc_type = np.concatenate([fg_coinc_type, combo_repeat_fg])
     combo_repeat_bg = np.array(np.repeat(key.encode('utf8'),
                                f_in['background/stat'].size))
@@ -587,29 +594,30 @@ while True:
     test_times = np.array([pycbc.events.mean_if_greater_than_zero(tc)[0]
                            for tc in zip(*times_tuple)])
     for key in all_ifo_combos:
-        bnlouder, fnlouder = significance.get_n_louder(
-            sep_bg_data[key].data['stat'],
-            sep_fg_data[key].data['stat'],
-            sep_bg_data[key].data['decimation_factor'],
-            **significance_dict[ifo_combo_key])
         # In principle, bg time should be adjusted, but is expected to be a
         # negligible correction
         fg_time_ct[key] -= args.cluster_window
         bg_t_y = conv.sec_to_year(bg_time_ct[key])
         fg_t_y = conv.sec_to_year(fg_time_ct[key])
-        sep_bg_data[key].data['ifar'] = bg_t_y / (bnlouder + 1)
-        sep_fg_data[key].data['ifar'] = bg_t_y / (fnlouder + 1)
+        bg_far, fg_far = significance.get_far(
+            sep_bg_data[key].data['stat'],
+            sep_fg_data[key].data['stat'],
+            sep_bg_data[key].data['decimation_factor'],
+            bg_t_y,
+            **significance_dict[ifo_combo_key])
+        sep_bg_data[key].data['ifar'] = 1. / bg_far
+        sep_fg_data[key].data['ifar'] = 1. / fg_far
         sep_fg_data[key].data['fap'] = 1 - \
-            np.exp(-fg_t_y / sep_fg_data[key].data['ifar'])
+            np.exp(-fg_t_y * fg_far)
 
     logging.info("Recalculating combined IFARs")
     for key in all_ifo_combos:
-        bnlouder, fnlouder = significance.get_n_louder(
+        _, far[key] = significance.get_far(
             sep_bg_data[key].data['stat'],
             combined_fg_data.data['stat'],
             sep_bg_data[key].data['decimation_factor'],
+            bg_time_ct[key],
             **significance_dict[ifo_combo_key])
-        far[key] = (fnlouder + 1) / bg_time_ct[key]
         # Set up variable for whether each coincidence is available in each coincidence time
         is_in_combo_time[key] = np.zeros(n_triggers)
         end_times = np.array(f['segments/%s/end' % key][:])

diff --git a/bin/all_sky_search/pycbc_coinc_statmap b/bin/all_sky_search/pycbc_coinc_statmap
@@ -241,23 +241,29 @@ fore_stat = all_trigs.stat[fore_locs]
 
 # Cumulative array of inclusive background triggers and the number of
 # inclusive background triggers louder than each foreground trigger
-back_cnum, fnlouder = significance.get_n_louder(
+bg_far, fg_far = significance.get_far(
     back_stat,
     fore_stat,
     all_trigs.decimation_factor[back_locs],
+    background_time,
     **significance_dict[ifo_combo])
 
 # Cumulative array of exclusive background triggers and the number
 # of exclusive background triggers louder than each foreground trigger
-back_cnum_exc, fnlouder_exc = significance.get_n_louder(
+bg_far_exc, fg_far_exc = significance.get_far(
     exc_zero_trigs.stat,
     fore_stat,
     exc_zero_trigs.decimation_factor,
+    background_time_exc,
     **significance_dict[ifo_combo])
 
-f['background/ifar'] = conv.sec_to_year(background_time / (back_cnum + 1))
-f['background_exc/ifar'] = conv.sec_to_year(background_time_exc /
-                                            (back_cnum_exc + 1))
+fg_far = significance.apply_far_limit(fg_far, significance_dict, combo=ifo_combo)
+bg_far = significance.apply_far_limit(bg_far, significance_dict, combo=ifo_combo)
+fg_far_exc = significance.apply_far_limit(fg_far_exc, significance_dict, combo=ifo_combo)
+bg_far_exc = significance.apply_far_limit(bg_far_exc, significance_dict, combo=ifo_combo)
+
+f['background/ifar'] = conv.sec_to_year(1. / bg_far)
+f['background_exc/ifar'] = conv.sec_to_year(1. / bg_far_exc)
 f.attrs['background_time'] = background_time
 f.attrs['foreground_time'] = coinc_time
 f.attrs['background_time_exc'] = background_time_exc
@@ -266,11 +272,11 @@ f.attrs['foreground_time_exc'] = coinc_time_exc
 logging.info("calculating ifar/fap values")
 
 if fore_locs.sum() > 0:
-    ifar = background_time / (fnlouder + 1)
+    ifar = 1. / fg_far
     fap = 1 - numpy.exp(- coinc_time / ifar)
     f['foreground/ifar'] = conv.sec_to_year(ifar)
     f['foreground/fap'] = fap
-    ifar_exc = background_time_exc / (fnlouder_exc + 1)
+    ifar_exc = 1. / fg_far_exc
     fap_exc = 1 - numpy.exp(- coinc_time_exc / ifar_exc)
     f['foreground/ifar_exc'] = conv.sec_to_year(ifar_exc)
     f['foreground/fap_exc'] = fap_exc
@@ -295,28 +301,27 @@ h_iterations = 0
 
 orig_fore_stat = fore_stat
 
-# Assign a new variable to keep track of whether we care about fnlouder or
-# fnlouder_exc. Whether we want to remove hierarchically against inclusive
+# Assign a new variable to keep track of whether we care about ifar or
+# ifar_exc. Whether we want to remove hierarchically against inclusive
 # or exclusive background.
 if args.max_hierarchical_removal != 0:
     # If user wants to remove against inclusive background.
     if args.hierarchical_removal_against == 'inclusive':
-        louder_foreground = fnlouder
+        ifar_foreground = ifar
     # Otherwise user wants to remove against exclusive background
     else :
-        louder_foreground = fnlouder_exc
+        ifar_foreground = ifar_exc
 else :
-    # It doesn't matter if you choose louder_foreground = fnlouder
-    # or vice versa, the while loop below will break if
-    # louder_foreground doesn't contain 0, or at the comparison
-    # h_iterations == args.max_hierarchical_removal. But this avoids
-    # a NameError
-    louder_foreground = fnlouder
+    # It doesn't matter if you choose ifar_foreground = ifar
+    # or ifar_exc, the while loop below will break
+    # straight away. But this avoids a NameError
+    ifar_foreground = ifar
 
 # Step 2 : Loop until we don't have to hierarchically remove anymore. This
-#          will happen when fnlouder has no elements that equal 0.
+#          will happen when ifar_foreground has no elements greater than
+#          the background time.
 
-while numpy.any(louder_foreground == 0):
+while numpy.any(ifar_foreground >= background_time):
     # If the user wants to stop doing hierarchical removals after a set
     # number of iterations then break when that happens.
     if (h_iterations == args.max_hierarchical_removal):
@@ -326,7 +331,7 @@ while numpy.any(louder_foreground == 0):
     # downstream codes.
     if h_iterations == 0:
         f['background_h%s/stat' % h_iterations] = back_stat
-        f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(background_time / (back_cnum + 1))
+        f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(1. / bg_far)
         f['background_h%s/timeslide_id' % h_iterations] = all_trigs.data['timeslide_id'][back_locs]
         f['foreground_h%s/stat' % h_iterations] = fore_stat
         f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(ifar)
@@ -408,38 +413,41 @@ while numpy.any(louder_foreground == 0):
     logging.info("Calculating FAN from background statistic values")
     back_stat = all_trigs.stat[back_locs]
     fore_stat = all_trigs.stat[fore_locs]
-    back_cnum, fnlouder = significance.get_n_louder(
+    bg_far, fg_far = significance.get_far(
         back_stat,
         fore_stat,
         all_trigs.decimation_factor[back_locs],
+        background_time,
+        return_counts=True,
         **significance_dict[ifo_combo])
 
-    # Update the louder_foreground criteria depending on whether foreground
+    # Update the ifar_foreground criteria depending on whether foreground
     # triggers are being removed via inclusive or exclusive background.   
     if args.hierarchical_removal_against == 'inclusive':
-        louder_foreground = fnlouder
+        ifar_foreground = 1. / fg_far
 
     # Exclusive background doesn't change when removing foreground triggers.
-    # So we don't have to take back_cnum_exc, just repopulate fnlouder_exc
+    # So we don't have to take background ifar, just repopulate ifar_foreground
     else :
-        _, fnlouder_exc = significance.get_n_louder(
+        _, fg_far_exc = significance.get_far(
             exc_zero_trigs.stat,
             fore_stat,
             exc_zero_trigs.decimation_factor,
+            background_time_exc,
             **significance_dict[ifo_combo])
-        louder_foreground = fnlouder_exc
-    # louder_foreground has been updated and the code can continue.
+        ifar_foreground = 1. / fg_far_exc
+    # ifar_foreground has been updated and the code can continue.
 
     logging.info("Calculating ifar/fap values")
-    f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(background_time / (back_cnum + 1))
+    f['background_h%s/ifar' % h_iterations] = conv.sec_to_year(1. / bg_far)
     f.attrs['background_time_h%s' % h_iterations] = background_time
     f.attrs['foreground_time_h%s' % h_iterations] = coinc_time
 
     if fore_locs.sum() > 0:
         # Write ranking statistic to file just for downstream plotting code
         f['foreground_h%s/stat' % h_iterations] = fore_stat
 
-        ifar = background_time / (fnlouder + 1)
+        ifar = 1. / fg_far
         fap = 1 - numpy.exp(- coinc_time / ifar)
         f['foreground_h%s/ifar' % h_iterations] = conv.sec_to_year(ifar)
         f['foreground_h%s/fap' % h_iterations] = fap

diff --git a/bin/all_sky_search/pycbc_coinc_statmap_inj b/bin/all_sky_search/pycbc_coinc_statmap_inj
@@ -83,13 +83,16 @@ f.attrs['foreground_time'] = coinc_time
 
 if len(zdata) > 0:
 
-    _, fnlouder_exc = significance.get_n_louder(
+    _, fg_far_exc = significance.get_far(
         back_stat,
         zdata.stat,
         dec_fac,
+        background_time,
         **significance_dict[ifo_key])
 
-    ifar_exc = background_time / (fnlouder_exc + 1)
+    fg_far_exc = significance.apply_far_limit(fg_far_exc, significance_dict, combo=ifo_key)
+
+    ifar_exc = 1. / fg_far_exc
     fap_exc = 1 - numpy.exp(- coinc_time / ifar_exc)
     f['foreground/ifar_exc'] = conv.sec_to_year(ifar_exc)
     f['foreground/fap_exc'] = fap_exc

diff --git a/bin/all_sky_search/pycbc_exclude_zerolag b/bin/all_sky_search/pycbc_exclude_zerolag
@@ -94,21 +94,24 @@ for k in filtered_trigs.data:
     f_out['background_exc/%s' % k] = filtered_trigs.data[k]
 
 logging.info('Recalculating IFARs')
-bnlouder, fnlouder = significance.get_n_louder(
+bg_far, fg_far = significance.get_far(
     filtered_trigs.data['stat'],
     f_in['foreground/stat'][:],
     filtered_trigs.data['decimation_factor'],
+    f_in.attrs['background_time_exc'],
     **significance_dict[all_ifo_key])
-# In principle exc background time should be recalculated
-# However we expect exclude zerolag to be a (very) small correction
-fg_time_exc = conv.sec_to_year(f_in.attrs['foreground_time_exc'])
-bg_time_exc = conv.sec_to_year(f_in.attrs['background_time_exc'])
-fg_ifar_exc = bg_time_exc / (fnlouder + 1)
-bg_ifar_exc = bg_time_exc / (bnlouder + 1)
+
+fg_far = significance.apply_far_limit(fg_far, significance_dict, combo=all_ifo_key)
+bg_far = significance.apply_far_limit(bg_far, significance_dict, combo=all_ifo_key)
+
+fg_ifar_exc = 1. / fg_far
+bg_ifar_exc = 1. / bg_far
 
 logging.info('Writing updated ifars to file')
-f_out['foreground/ifar_exc'][:] = fg_ifar_exc
+f_out['foreground/ifar_exc'][:] = conv.sec_to_year(fg_ifar_exc)
+f_out['background_exc/ifar'][:] = conv.sec_to_year(bg_ifar_exc)
+
+fg_time_exc = conv.sec_to_year(f_in.attrs['foreground_time_exc'])
 f_out['foreground/fap_exc'][:] = 1 - np.exp(-fg_time_exc / fg_ifar_exc)
-f_out['background_exc/ifar'][:] = bg_ifar_exc
 
 logging.info("Done!")