Skip to content

Commit

Permalink
clean up FCChh bbyy example, add example for checking btag efficienci…
Browse files Browse the repository at this point in the history
…es from delphes
  • Loading branch information
bistapf committed Oct 16, 2024
1 parent 2db3f23 commit 0609fd9
Show file tree
Hide file tree
Showing 5 changed files with 390 additions and 61 deletions.
19 changes: 9 additions & 10 deletions examples/FCChh/ggHH_bbyy/analysis_final.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@
outputDir = "outputs/FCChh/ggHH_bbyy/final/"

processList = {
# 'pwp8_pp_hh_5f_hhbbyy':{},#Run over the full statistics from stage2 input file <inputDir>/p8_ee_ZZ_ecm240.root. Keep the same output name as input
'pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags':{},#Run over the full statistics from stage2 input file <inputDir>/p8_ee_ZZ_ecm240.root. Keep the same output name as input
'pwp8_pp_hh_5f_hhbbyy':{}, #output file from analysis_stage1.py
}

#Link to the dictonary that contains all the cross section informations etc...
procDict = "FCCee_procDict_spring2021_IDEA.json" # will need an FCC-hh one!
procDict = "/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/FCChh_procDict_tutorial.json"
#Note the numbeOfEvents and sumOfWeights are placeholders that get overwritten with the correct values in the samples

#Add MySample_p8_ee_ZH_ecm240 as it is not an offical process TO UPDATE
# procDictAdd={"pwp8_pp_hh_5f_hhbbyy":{"numberOfEvents": 10000000, "sumOfWeights": 10000000, "crossSection": 1.0, "kfactor": 1.0, "matchingEfficiency": 1.0}}
# procDictAdd={"pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags":{"numberOfEvents": 10000000, "sumOfWeights": 10000000, "crossSection": 1.0, "kfactor": 1.0, "matchingEfficiency": 1.0}}
procDictAdd={"pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags": {"numberOfEvents": 4980000, "sumOfWeights": 4980000.0, "crossSection": 0.0029844128399999998, "kfactor": 1.075363, "matchingEfficiency": 1.0}}
#How to add a process that is not in the official dictionary:
# procDictAdd={"pwp8_pp_hh_5f_hhbbyy": {"numberOfEvents": 4980000, "sumOfWeights": 4980000.0, "crossSection": 0.0029844128399999998, "kfactor": 1.075363, "matchingEfficiency": 1.0}}

# Expected integrated luminosity
intLumi = 30e+06 # pb-1
Expand All @@ -34,17 +32,18 @@
# Optional: Use weighted events
do_weighted = True

###Dictionnay of the list of cuts. The key is the name of the selection that will be added to the output file
# Dictionary of the list of cuts. The key is the name of the selection that will be added to the output file
cutList = {
"sel0_myy":"m_yy[0] > 100. && m_yy[0] < 180.",
"sel1_mbb":"(m_yy[0] > 100. && m_yy[0] < 180.) && (m_bb[0] > 80. && m_bb[0] < 200.)",
}


#Dictionary for the ouput variable/hitograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value.
# Dictionary for the output variable/histograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value.
histoList = {
"myy":{"name":"m_yy","title":"m_{#gamma#gamma} [GeV]","bin":50,"xmin":0,"xmax":200},
"myy_zoom":{"name":"m_yy","title":"m_{#gamma#gamma} [GeV]","bin":50,"xmin":100,"xmax":180},
"mbb":{"name":"m_bb","title":"m_{bb} [GeV]","bin":50,"xmin":0,"xmax":250},
"mbb_zoom":{"name":"m_bb","title":"m_{b} [GeV]","bin":50,"xmin":80,"xmax":200},
"y1_pT":{"name":"g1_pt","title":"pT_{#gamma1} [GeV]","bin":50,"xmin":0.,"xmax":200.},
"y2_pT":{"name":"g2_pt","title":"pT_{#gamma2} [GeV]","bin":50,"xmin":0.,"xmax":200.},
"pT_y1_vs_y2_2D":{"cols":["g1_pt", "g2_pt"],"title":"m_{Z} - leptonic recoil [GeV]", "bins": [(40,80,100), (100,120,140)]}, # 2D histogram
Expand Down
156 changes: 156 additions & 0 deletions examples/FCChh/ggHH_bbyy/analysis_plot_tag_eff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
'''
Analysis example for FCC-hh, using gg->HH->bbyy di-Higgs production events to check the Delphes b-tagging efficiencies
'''
from argparse import ArgumentParser

# Mandatory: Analysis class where the user defines the operations on the
# dataframe.
class Analysis():
'''
Validation of Delphes b-tagging efficiencies in HH->bbyy events.
'''
def __init__(self, cmdline_args):
parser = ArgumentParser(
description='Additional analysis arguments',
usage='Provide additional arguments after analysis script path')
# parser.add_argument('--bjet-pt', default='10.', type=float,
# help='Minimal pT of the selected b-jets.')
# Parse additional arguments not known to the FCCAnalyses parsers
# All command line arguments know to fccanalysis are provided in the
# `cmdline_arg` dictionary.
self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown'])

# Mandatory: List of processes to run over
self.process_list = {
# # Add your processes like this:
## '<name of process>':{'fraction':<fraction of events to run over>, 'chunks':<number of chunks to split the output into>, 'output':<name of the output file> },
# # - <name of process> needs to correspond either the name of the input .root file, or the name of a directory containing root files
# # If you want to process only part of the events, split the output into chunks or give a different name to the output use the optional arguments
# # or leave blank to use defaults = run the full statistics in one output file named the same as the process:
'pwp8_pp_hh_5f_hhbbyy': {},
}

# Mandatory: Input directory where to find the samples, or a production tag when running over the centrally produced
# samples (this points to the yaml files for getting sample statistics)
self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/'

# Optional: output directory, default is local running directory
self.output_dir = 'outputs/FCChh/ggHH_bbyy/nosel/'

# Optional: analysisName, default is ''
# self.analysis_name = 'My Analysis'

# Optional: number of threads to run on, default is 'all available'
# self.n_threads = 4

# Optional: running on HTCondor, default is False
# self.run_batch = False

# Optional: Use weighted events
self.do_weighted = True

# Optional: test file that is used if you run with the --test argument (fccanalysis run ./examples/FCChh/ggHH_bbyy/analysis_stage1.py --test)
self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \
'tutorials/edm4hep_tutorial_data/' \
'pwp8_pp_hh_5f_hhbbyy.root'


# Mandatory: analyzers function to define the analysis graph, please make
# sure you return the dataframe, in this example it is dframe2
def analyzers(self, dframe):
'''
Analysis graph.
'''

dframe2 = (
dframe

.Define("weight", "EventHeader.weight")

########################################### JETS ###########################################

#LOOSE WP
.Define("b_tagged_jets_loose", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 0)") #bit 0 = loose WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
.Define("n_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_loose)")
.Define("px_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_loose)")
.Define("py_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_loose)")
.Define("pz_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_loose)")
.Define("E_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_loose)")
.Define("pT_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_loose)")
.Define("eta_b_jets_loose", "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_loose)")

#MEDIUM WP
.Define("b_tagged_jets_medium", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 1)") #bit 1 = medium WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
.Define("n_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_medium)")
.Define("px_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_medium)")
.Define("py_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_medium)")
.Define("pz_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_medium)")
.Define("E_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_medium)")
.Define("pT_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_medium)")
.Define("eta_b_jets_medium", "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_medium)")

#TIGHT WP
.Define("b_tagged_jets_tight", "AnalysisFCChh::get_tagged_jets(Jet, Jet_HF_tags, _Jet_HF_tags_particle, _Jet_HF_tags_parameters, 2)") #bit 2 = tight WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
.Define("n_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_n(b_tagged_jets_tight)")
.Define("px_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_px(b_tagged_jets_tight)")
.Define("py_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_py(b_tagged_jets_tight)")
.Define("pz_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_pz(b_tagged_jets_tight)")
.Define("E_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_e(b_tagged_jets_tight)")
.Define("pT_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_pt(b_tagged_jets_tight)")
.Define("eta_b_jets_tight", "FCCAnalyses::ReconstructedParticle::get_eta(b_tagged_jets_tight)")

########################################### MC PARTICLES ###########################################

#all MC particles
.Define("mc_particles", "Particle")
.Alias("mc_parents", "_Particle_parents.index")
.Alias("mc_daughters", "_Particle_daughters.index")


################################# Gen matched b-jets for b-tag eff study ###########################################
.Define("MC_b", "AnalysisFCChh::getBhadron(mc_particles,mc_parents)")
.Define("jets_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, Jet, 0.4)")

.Define("bjets_loose_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_loose, 0.4)")
.Define("bjets_medium_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_medium, 0.4)")
.Define("bjets_tight_genmatched_b", "AnalysisFCChh::find_reco_matches(MC_b, b_tagged_jets_tight, 0.4)")

#all genmatched
.Define("n_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(jets_genmatched_b)")
.Define("pT_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(jets_genmatched_b)")
.Define("eta_jets_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(jets_genmatched_b)")

#loose btag
.Define("n_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_loose_genmatched_b)")
.Define("pT_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_loose_genmatched_b)")
.Define("eta_bjets_loose_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_loose_genmatched_b)")

#medium btag
.Define("n_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_medium_genmatched_b)")
.Define("pT_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_medium_genmatched_b)")
.Define("eta_bjets_medium_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_medium_genmatched_b)")

#tight btag
.Define("n_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_n(bjets_tight_genmatched_b)")
.Define("pT_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_pt(bjets_tight_genmatched_b)")
.Define("eta_bjets_tight_genmatched_b", "FCCAnalyses::ReconstructedParticle::get_eta(bjets_tight_genmatched_b)")

)
return dframe2

# Mandatory: output function, please make sure you return the branch list
# as a python list
def output(self):
'''
Output variables which will be saved to output root file.
'''
branch_list = [
'weight',
#gen-matched b-jets to check efficiencies
'n_b_jets_loose', 'n_b_jets_medium', 'n_b_jets_tight',
'n_jets_genmatched_b', 'pT_jets_genmatched_b', 'eta_jets_genmatched_b',
'n_bjets_loose_genmatched_b', 'pT_bjets_loose_genmatched_b', 'eta_bjets_loose_genmatched_b',
'n_bjets_medium_genmatched_b', 'pT_bjets_medium_genmatched_b', 'eta_bjets_medium_genmatched_b',
'n_bjets_tight_genmatched_b', 'pT_bjets_tight_genmatched_b', 'eta_bjets_tight_genmatched_b',
]
return branch_list
12 changes: 4 additions & 8 deletions examples/FCChh/ggHH_bbyy/analysis_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
outdir = 'outputs/FCChh/ggHH_bbyy/plots/'
plotStatUnc = True

variables = ['myy','myy_zoom','y1_pT','y2_pT']
# variables = ['myy','myy_zoom','y1_pT','y2_pT','pT_y1_vs_y2_2D']
variables = ['myy','myy_zoom', 'mbb', 'mbb_zoom', 'y1_pT','y2_pT']

# rebin = [1, 1, 1, 1, 2] # uniform rebin per variable (optional)

### Dictionary with the analysis name as a key, and the list of selections to be plotted for this analysis. The name of the selections should be the same than in the final selection
Expand All @@ -28,14 +28,10 @@

colors = {}
colors['bbyy_signal'] = ROOT.kRed
# colors['WW'] = ROOT.kBlue+1
# colors['ZZ'] = ROOT.kGreen+2
# colors['VV'] = ROOT.kGreen+3

plots = {}
plots['bbyy_analysis'] = {'signal':{'bbyy_signal':['pwp8_pp_hh_5f_hhbbyy_split_HF_tau_tags']},
plots['bbyy_analysis'] = {'signal':{'bbyy_signal':['pwp8_pp_hh_5f_hhbbyy']},
}


legend = {}
legend['bbyy_signal'] = 'HH signal'
legend['bbyy_signal'] = 'HH'
Loading

0 comments on commit 0609fd9

Please sign in to comment.