Skip to content

Commit

Permalink
add discretized TXbb control plots (#260)
Browse files Browse the repository at this point in the history
* add discretized TXbb control plots

* simplify correction and bins loading; add blind plots for bdt

* style: pre-commit fixes

---------

Co-authored-by: cmantill <[email protected]>
Co-authored-by: cmantill <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
4 people authored Feb 22, 2025
1 parent c9d7be2 commit 91a83cc
Show file tree
Hide file tree
Showing 5 changed files with 196 additions and 98 deletions.
21 changes: 6 additions & 15 deletions src/HH4b/boosted/bdt_trainings_run3/v13_glopartv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import pandas as pd
import vector

from HH4b.utils import discretize_var

"""
This config is based on v10_glopartv2.py, but with the following changes:
Discretized the TXbb variable into 5 integer categories
Expand Down Expand Up @@ -95,7 +97,10 @@ def bdt_dataframe(events, key_map=lambda x: x):
key_map("H2Pt"): h2.pt,
key_map("H1eta"): h1.eta,
# xbb
key_map("H1Xbb"): disc_TXbb(events[key_map("bbFatJetParTTXbb")].to_numpy()[:, 0]),
key_map("H1Xbb"): discretize_var(
events[key_map("bbFatJetParTTXbb")].to_numpy()[:, 0],
bins=[0, 0.8, 0.9, 0.94, 0.97, 0.99, 1],
),
# ratios
key_map("H1Pt_HHmass"): h1.pt / hh.mass,
key_map("H2Pt_HHmass"): h2.pt / hh.mass,
Expand All @@ -112,17 +117,3 @@ def bdt_dataframe(events, key_map=lambda x: x):
)

return df_events


def disc_TXbb(txbb_array):

# define binning
bins = [0, 0.8, 0.9, 0.94, 0.97, 0.99, 1]

# discretize the TXbb variable into len(bins)-1 integer categories
bin_indices = np.digitize(txbb_array, bins)

# clip just to be safe
bin_indices = np.clip(bin_indices, 1, len(bins) - 1)

return bin_indices
34 changes: 31 additions & 3 deletions src/HH4b/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ def ratioHistPlot(
energy: str = "13.6",
add_pull: bool = False,
reweight_qcd: bool = False,
qcd_norm: float = None,
save_pdf: bool = True,
):
"""
Expand Down Expand Up @@ -389,6 +390,8 @@ def ratioHistPlot(
plot_significance (bool): plot Asimov significance below ratio plot
significance_dir (str): "Direction" for significance. i.e. a > cut ("right"), a < cut ("left"), or per-bin ("bin").
axrax (Tuple): optionally input ax and rax instead of creating new ones
reweight_qcd (bool): reweight qcd process to agree with data-othermc
qcd_norm (float): normalization to reweight qcd process, if not None
"""

# copy hists and bg_keys so input objects are not changed
Expand Down Expand Up @@ -453,11 +456,16 @@ def ratioHistPlot(

# re-weight qcd
kfactor = {sample: 1 for sample in bg_keys}
if reweight_qcd:
if reweight_qcd and qcd_norm is None:
bg_yield = np.sum(sum([hists[sample, :] for sample in bg_keys]).values())
data_yield = np.sum(hists[data_key, :].values())
if bg_yield > 0:
kfactor["qcd"] = data_yield / bg_yield
print("kfactor ", kfactor["qcd"], qcd_norm)
elif reweight_qcd:
kfactor["qcd"] = qcd_norm
else:
kfactor["qcd"] = 1.0

# background samples
if len(bg_keys) > 0:
Expand Down Expand Up @@ -571,13 +579,18 @@ def get_variances(bg_hist):

# print(hists.axes[1].widths)

bg_err_tot_mcstat = None
if bg_err_mcstat:
bg_err_label = (
"Stat. MC Uncertainty (excl. Multijet)"
if exclude_qcd_mcstat
else "Stat. MC Uncertainty"
)

bg_tot = sum([hists[sample, :] for sample in bg_keys])
bg_err_tot_mcstat = np.sqrt(bg_tot.variances())
# print("mcstat ",bg_err_tot_mcstat)

plot_shaded = False

mcstat_up = {}
Expand Down Expand Up @@ -607,8 +620,9 @@ def get_variances(bg_hist):
yerr=yerr,
histtype="errorbar",
markersize=0,
color="gray",
color="black",
label=bg_err_label,
xerr=True,
)
else:
hep.histplot(
Expand All @@ -617,7 +631,8 @@ def get_variances(bg_hist):
yerr=yerr,
histtype="errorbar",
markersize=0,
color="gray",
color="black",
xerr=True,
)

if plot_shaded:
Expand Down Expand Up @@ -707,6 +722,7 @@ def get_variances(bg_hist):
histtype="errorbar",
markersize=20,
color="black",
xerr=True,
capsize=0,
)
rax.set_xlabel(hists.axes[1].label)
Expand All @@ -723,6 +739,16 @@ def get_variances(bg_hist):
hatch="//",
linewidth=0,
)
if bg_err_tot_mcstat is not None:
ax.fill_between(
np.repeat(hists.axes[1].edges, 2)[1:-1],
np.repeat((bg_err_tot_mcstat) / tot_val, 2),
np.repeat((bg_err_tot_mcstat) / tot_val, 2),
color="black",
alpha=0.1,
hatch="//",
linewidth=0,
)
else:
rax.set_xlabel(hists.axes[1].label)

Expand Down Expand Up @@ -843,6 +869,8 @@ def get_variances(bg_hist):
else:
plt.close()

return kfactor.get("qcd", 1.0)


def subtractedHistPlot(
hists: Hist,
Expand Down
Loading

0 comments on commit 91a83cc

Please sign in to comment.