Skip to content

Commit

Permalink
Merge pull request #5 from pmastrap/FCChh_HH_analyses
Browse files Browse the repository at this point in the history
Fc chh hh analyses
  • Loading branch information
bistapf authored Aug 11, 2023
2 parents c63f1b8 + 62ddbed commit d7aa452
Show file tree
Hide file tree
Showing 12 changed files with 15,659 additions and 37 deletions.
139 changes: 139 additions & 0 deletions run/bbyy_analysis/Combine/card_maker_allCats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/env python
# Author: C.Caputo (UCLouvain)

import CombineHarvester.CombineTools.ch as ch

import ROOT as R
import glob
import os

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--reg", type=str, required=True, choices=['sb', 'c'], help="region in mbb" )
parser.add_argument("--tag", type=str, required=True, help="Datacard name tag" )
parser.add_argument("--input", type=str, required=True, help="Root files input path" )
parser.add_argument("--scen", type=str, required=True, choices=['I', 'II', 'III'],help="Scenario (I, II, III)")
args = parser.parse_args()

cb = ch.CombineHarvester()

reg = args.reg
tag = args.tag
input = args.input
scen = args.scen

####DEF: syst for different scenarios##
systScenI = {"lumi" : 0.005,
"btag" : 0.005,
"sigxs": 0.005,
"phid" : 0.005}
systScenII = {"lumi" : 0.01,
"btag" : 0.01,
"sigxs": 0.01,
"phid" : 0.01}
systScenIII = {"lumi" : 0.02,
"btag" : 0.02,
"sigxs": 0.02,
"phid" : 0.02}

syst_dict = {"I" : systScenI,
"II" : systScenII,
"III": systScenIII
}

auxiliaries = os.environ['CMSSW_BASE'] + '/src/HiggsAnalysis/CombinedLimit/HH_FCChh_newCard_2023'
aux_shapes = auxiliaries +'/{inputFolder}'.format(inputFolder=input)

print auxiliaries
print aux_shapes


chns = ['great350_high_purity', 'great350_medium_purity', 'small350_high_purity', 'small350_medium_purity']

print(chns)


bkg_procs = {
'great350_high_purity' : ['mgg_mgp8_pp_tth01j_5f_haa', 'mgg_mgp8_pp_h012j_5f_haa', 'mgg_mgp8_pp_jjaa_5f','mgg_mgp8_pp_jja_5f', 'mgg_mgp8_pp_vh012j_5f_haa', 'mgg_mgp8_pp_vbf_h01j_5f_haa'],
'great350_medium_purity' : ['mgg_mgp8_pp_tth01j_5f_haa', 'mgg_mgp8_pp_h012j_5f_haa', 'mgg_mgp8_pp_jjaa_5f', 'mgg_mgp8_pp_jja_5f','mgg_mgp8_pp_vh012j_5f_haa', 'mgg_mgp8_pp_vbf_h01j_5f_haa'],
'small350_high_purity' : ['mgg_mgp8_pp_tth01j_5f_haa', 'mgg_mgp8_pp_h012j_5f_haa', 'mgg_mgp8_pp_jjaa_5f','mgg_mgp8_pp_jja_5f', 'mgg_mgp8_pp_vh012j_5f_haa', 'mgg_mgp8_pp_vbf_h01j_5f_haa'],
'small350_medium_purity' : ['mgg_mgp8_pp_tth01j_5f_haa', 'mgg_mgp8_pp_h012j_5f_haa', 'mgg_mgp8_pp_jjaa_5f', 'mgg_mgp8_pp_jja_5f','mgg_mgp8_pp_vh012j_5f_haa', 'mgg_mgp8_pp_vbf_h01j_5f_haa']
}

sig_procs = ['mgg_pwp8_pp_hh_lambda100_5f_hhbbaa', 'mgg_pwp8_pp_hh_lambda240_5f_hhbbaa', 'mgg_pwp8_pp_hh_lambda300_5f_hhbbaa']



cats = {
'great350_medium_purity' : [
(0, 'great350_medium_purity'),
],
'great350_high_purity' : [
(0, 'great350_high_purity'),
],
'small350_medium_purity' : [
(0, 'small350_medium_purity'),
],
'small350_high_purity' : [
(0, 'small350_high_purity'),
]
}

print(cats)
print '>> Creating processes and observations...'


for chn in chns:
cb.AddObservations( ['*'], ['HHbbgg'], ['100TeV'], [chn], cats[chn] )
cb.AddProcesses( ['*'], ['HHbbgg'], ['100TeV'], [chn], bkg_procs[chn], cats[chn], False )
cb.AddProcesses( [''], ['HHbbgg'], ['100TeV'], [chn], sig_procs, cats[chn], True )#mod 125



print '>> Adding systematic uncertainties...'
signal = cb.cp().signals().process_set()

MC_Backgrouds = ['mgg_mgp8_pp_tth01j_5f_haa', 'mgg_mgp8_pp_h012j_5f_haa', 'mgg_mgp8_pp_jjaa_5f', 'mgg_mgp8_pp_jja_5f','mgg_mgp8_pp_vh012j_5f_haa', 'mgg_mgp8_pp_vbf_h01j_5f_haa']

cb.cp().process(signal+MC_Backgrouds).AddSyst(cb, "lumi", "lnN", ch.SystMap()([1.+syst_dict[scen]["lumi"],1.-syst_dict[scen]["lumi"]]))
cb.cp().process(signal+MC_Backgrouds).AddSyst(cb, "phid", "lnN", ch.SystMap()([1.+syst_dict[scen]["phid"],1.-syst_dict[scen]["phid"]]))
cb.cp().process(signal+MC_Backgrouds).AddSyst(cb, "btag", "lnN", ch.SystMap()([1.+syst_dict[scen]["btag"],1-syst_dict[scen]["btag"]]))
cb.cp().process(signal).AddSyst(cb, "sigxs", "lnN", ch.SystMap()([1.+syst_dict[scen]["sigxs"],1-syst_dict[scen]["sigxs"]]))



print '>> Extracting histograms from input root files...'
for chn in chns:
file = aux_shapes + "/" + chn + "_"+reg+".root"
cb.cp().channel([chn]).era(['100TeV']).backgrounds().ExtractShapes(
file, '$PROCESS', '$PROCESS_$SYSTEMATIC')
cb.cp().channel([chn]).era(['100TeV']).signals().ExtractShapes(
file, '$PROCESS$MASS', '$PROCESS$MASS_$SYSTEMATIC')

print '>> Setting standardised bin names...'
ch.SetStandardBinNames(cb)

writer = ch.CardWriter('LIMITS/$TAG/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt',
'LIMITS/$TAG/$ANALYSIS_$CHANNEL.input.root')

print(writer)

writer.SetVerbosity(2)
writer.WriteCards('{}/{}/{}'.format(tag,scen,reg), cb) ## the first argument is the $TAG
# for chn in chns: writer.WriteCards(chn,cb.cp().channel([chn]))
#HHbbtautau_tau_0_13TeV
#for chn in chns:
# with open("./LIMITS/"+tag+"/HHbbtautau_"+chn+"_0_14TeV.txt",'a') as f:
# f.write("* autoMCStats 0 1 1")

print '>> Done!'
#for chn in chns:
##if()
# #with open("./LIMITS/"+tag+"/"+reg+"/"+str(chns[0])+"_"+str(chns[0])+"_0_13TeV.txt",'a') as f:
# print(chn)
# with open('LIMITS/'+tag+'/'+reg+'/HHbbgg_'+chn+'_0_100TeV.txt', 'a') as f:
# f.write("Signal_rateParam rateParam HHbbgg_"+chn+"_0_100TeV mgg_pwp8_pp_hh_lambda100_5f_hhbbaa 15.000\n")
# f.write("Signal_rateParam rateParam HHbbgg_"+chn+"_0_100TeV mgg_pwp8_pp_hh_lambda240_5f_hhbbaa 15.000\n")
# f.write("Signal_rateParam rateParam HHbbgg_"+chn+"_0_100TeV mgg_pwp8_pp_hh_lambda300_5f_hhbbaa 15.000\n")
# f.write(" ")
print '>> Done!'
47 changes: 47 additions & 0 deletions run/bbyy_analysis/Combine/run_cards_allCats.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
#bbtautau or bbgg or bbbb or ALL
channel=$1
#name of the dir to put limits
folder=$2

echo $channel
if [ $channel = "bbgg" ] || [ $channel = "ALL" ]
then

SCENARIOS=('I' 'II' 'III')

for scen in "${SCENARIOS[@]}"
do
python card_maker_allCats.py --input input_files --tag $folder --reg sb --scen "$scen"
python card_maker_allCats.py --input input_files --tag $folder --reg c --scen "$scen"
cd LIMITS/$folder/$scen
combineCards.py ./sb/HHbbgg_small350_medium_purity_0_100TeV.txt ./c/HHbbgg_small350_medium_purity_0_100TeV.txt > small350_medium_purity.txt
combineCards.py ./sb/HHbbgg_small350_high_purity_0_100TeV.txt ./c/HHbbgg_small350_high_purity_0_100TeV.txt > small350_high_purity.txt
combineCards.py small350_medium_purity.txt small350_high_purity.txt > small350_combine.txt

combineCards.py ./sb/HHbbgg_great350_medium_purity_0_100TeV.txt ./c/HHbbgg_great350_medium_purity_0_100TeV.txt > great350_medium_purity.txt
combineCards.py ./sb/HHbbgg_great350_high_purity_0_100TeV.txt ./c/HHbbgg_great350_high_purity_0_100TeV.txt > great350_high_purity.txt
combineCards.py great350_medium_purity.txt great350_high_purity.txt > great350_combine.txt

combineCards.py great350_combine.txt small350_combine.txt > combine_all.txt
text2workspace.py combine_all.txt -P HiggsAnalysis.CombinedLimit.HHModel:HHdefault --mass=125

combine -M MultiDimFit combine_all.root --verbose 1 --algo grid --points=200 -P kl --floatOtherPOIs=0 --setParameterRanges kl=0.8,1.5 --cl=0.68 --setParameters r=1,r_gghh=1,r_qqhh=1,CV=1,C2V=3,kt=1 --robustFit 1 -t -1 --fastScan -n fastScan
#plot1DScan.py higgsCombinefastScan.MultiDimFit.mH120.root --POI kl --output fastscan

combine -M MultiDimFit combine_all.root --verbose 1 --algo grid --points=200 -P kl --floatOtherPOIs=0 --setParameterRanges kl=0.8,1.5 --cl=0.68 --setParameters r=1,r_gghh=1,r_qqhh=1,CV=1,C2V=3,kt=1 --robustFit 1 -t -1
#plot1DScan.py higgsCombineTest.MultiDimFit.mH120.root --POI kl --output scan
cd -
done
cd LIMITS/$folder/
plot1DScan.py ./I/higgsCombinefastScan.MultiDimFit.mH120.root --POI kl --others ./I/higgsCombineTest.MultiDimFit.mH120.root:scenI:4 ./II/higgsCombineTest.MultiDimFit.mH120.root:scenII:3 ./III/higgsCombineTest.MultiDimFit.mH120.root:scenIII:6 --main-label onlystat --logo FCC-hh --logo-sub "Work in progress" --y-max 15


#combine -M AsymptoticLimits combine_all.root --run blind -m 125 > output_Asym_Lim.txt
#combine -M Significance combine_all.root -t -1 --expectSignal=1 > output_Sig.txt
#combineTool.py -M Impacts -d combine_all.root -m 125 -t -1 --doInitialFit --robustFit 1 --rMax 100 --expectSignal 10
#combineTool.py -M Impacts -d combine_all.root -m 125 -t -1 --robustFit 1 --doFits --parallel 8 --rMax 100 --expectSignal 10
#combineTool.py -M Impacts -d combine_all.root -m 125 -t -1 -o impacts.json --rMax 100 --expectSignal 10
#plotImpacts.py -i impacts.json -o impacts_combined_10
#rm higgsCombine*
fi
22 changes: 20 additions & 2 deletions run/bbyy_analysis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ Then run on batch by switching back `runBatch = True` (ideally this could be pas
fccanalysis run analysis_bbyy_selections_v2.py
```
This step will produce root files needed for the following.

To generate the json file with info on xs, total number of generated events, sumofweights etc. run
```
python create_norm_dict.py -i <directory_with_your_ntuples>
```
## Process root files
Process root files submittting jobs on condor with:
```
Expand All @@ -33,4 +36,19 @@ Finally, convert the root files in a Parquet:
```
python openTree.py
```
At the end there will be only one parquet called `df_Sel_All.parquet` with all the samples.
At the end there will be only one parquet called `df_Sel_All_haa_Mbtag.parquet` with all the samples.

## Notebooks: Run DNNs and define cuts and categorization
`full_3DNN.ipynb` performs ttH killer dnn training and other 2 "global" dnns training (one in Mx>350 GeV, th other in Mx<350 GeV).
It saves the dnn models to be retrieved later by `applyDNN_SelCat_full3DNN.ipynb`.
`applyDNN_SelCat_full3DNN.ipynb` applies the dnns to the whole dataset (trick to not make the notebook crush: run separatly on df_signal and df_bkg then concatenate the dfs). It finds the best cut for ttH killer and the best delimiters for high/low purity and central/sidebands regions. Finally it saves one dataframe for each category in a parquet file.

## Create inputs for Combine
Run `create_histo_allCats.py` to open the dfs of the previous step, extract the m_gg, bin the distribution and save it in a root file. Here a+jj is added (scaled from the jj+aa) and 'data_obs' is append at the resulting root file to satisfy Combine requirements.

## Create card and run Combine
Run:
```
bash run_cards_allCats.sh bbgg <name_folder_you_like>
```
It will produce cards, combine them, run the fit, plot the NLL scan for every scarios (I,II,III)
24 changes: 15 additions & 9 deletions run/bbyy_analysis/analysis_bbyy_selections_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
"pwp8_pp_hh_lambda240_5f_hhbbaa":{'chunks':500, 'output':"FCChh_EvtGen_pwp8_pp_hh_lambda240_5f_hhbbaa"},
"pwp8_pp_hh_lambda300_5f_hhbbaa":{'chunks':500, 'output':"FCChh_EvtGen_pwp8_pp_hh_lambda300_5f_hhbbaa"},
"pwp8_pp_hh_lambda000_5f_hhbbaa":{'chunks':500, 'output':"FCChh_EvtGen_pwp8_pp_hh_lambda000_5f_hhbbaa"},
"mgp8_pp_h012j_5f":{'chunks':500, 'output':"FCChh_EvtGen_mgp8_pp_h012j_5f"},
"mgp8_pp_vbf_h01j_5f":{'chunks':500, 'output':"FCChh_EvtGen_mgp8_pp_vbf_h01j_5f"},
"mgp8_pp_tth01j_5f":{'chunks':500, 'output':"FCChh_EvtGen_mgp8_pp_tth01j_5f"},
"mgp8_pp_vh012j_5f":{'chunks':500, 'output':"FCChh_EvtGen_mgp8_pp_vh012j_5f"},
#"mgp8_pp_h012j_5f_haa":{#'chunks':500, 'output':"FCChh_EvtGen_mgp8_pp_h012j_5f_haa"},
# "mgp8_pp_vbf_h01j_5f":{'chunks':500, 'output':"FCChh_EvtGen_mgp8_pp_vbf_h01j_5f"},
# "mgp8_pp_tth01j_5f_haa/events_000000143":{#'chunks':500,
#'output':"FCChh_EvtGen_mgp8_pp_tth01j_5f_haa"},
# "mgp8_pp_vh012j_5f":{'chunks':500, 'output':"FCChh_EvtGen_mgp8_pp_vh012j_5f"},
"mgp8_pp_jjaa_5f":{'chunks':500, 'output':"FCChh_EvtGen_mgp8_pp_jjaa_5f"},
# "mgp8_pp_tt012j_5f": {'chunks':500, 'output':"FCChh_EvtGen_mgp8_pp_tt012j_5f"}

Expand All @@ -34,7 +35,7 @@
inputDir = "/eos/experiment/fcc/hh/generation/DelphesEvents/fcc_v05_scenarioI/" #your directory with the input file

#Optional: output directory, default is local dir
outputDir = "/eos/user/p/pmastrap/FCCFW/Analysis/FCCAnalyses/run/Delphescard_validation/FCCAnalysis_ntuples_forAnalysis/"
outputDir = "/eos/user/p/pmastrap/FCCFW/Analysis/FCCAnalyses/run/Delphescard_validation/FCCAnalysis_ntuples_forAnalysis_Mbtag/"

#Optional: ncpus, default is 4
nCPUS = 8
Expand Down Expand Up @@ -73,10 +74,11 @@ def analysers(df, out_name):

#b-tagged jets:
.Alias("Jet3","Jet#3.index")

#.Alias("Jet3","_Jet_particleIDs.index")
#LOOSE WP : bit 1 = medium WP, bit 2 = tight WP
#b tagged jets
.Define("bjets", "AnalysisFCChh::get_tagged_jets(Jet, Jet3, ParticleIDs, ParticleIDs_0, 0)") #bit 0 = loose WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
#.Define("bjets", "AnalysisFCChh::get_tagged_jets(Jet, Jet3, ParticleIDs, _ParticleIDs_parameters, 0)")
.Define("bjets", "AnalysisFCChh::get_tagged_jets(Jet, Jet3, ParticleIDs, ParticleIDs_0, 1)") #bit 0 = loose WP, see: https://github.com/delphes/delphes/blob/master/cards/FCC/scenarios/FCChh_I.tcl
.Define("selpt_bjets", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(bjets)")
.Define("sel_bjets_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_bjets)")
.Define("sel_bjets", "AnalysisFCChh::SortParticleCollection(sel_bjets_unsort)")
Expand All @@ -94,6 +96,7 @@ def analysers(df, out_name):

#all isolated
.Alias("Electron0", "Electron#0.index")
#.Alias("Electron0", "Electron_objIdx.index")
.Define("ele", "FCCAnalyses::ReconstructedParticle::get(Electron0, ReconstructedParticles)")
.Define("selpt_ele", "FCCAnalyses::ReconstructedParticle::sel_pt(10.)(ele)")
.Define("sel_ele_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_ele)")
Expand All @@ -112,6 +115,7 @@ def analysers(df, out_name):

# all isolated
.Alias("Muon0", "Muon#0.index")
#.Alias("Muon0", "Muon_objIdx.index")
.Define("mu", "FCCAnalyses::ReconstructedParticle::get(Muon0, ReconstructedParticles)")
.Define("selpt_mu", "FCCAnalyses::ReconstructedParticle::sel_pt(10.)(mu)")
.Define("sel_mu_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_mu)")
Expand All @@ -131,6 +135,7 @@ def analysers(df, out_name):
#all, after isolation

.Alias("Photon0", "Photon#0.index")
#.Alias("Photon0", "Photon_objIdx.index")
.Define("gamma", "FCCAnalyses::ReconstructedParticle::get(Photon0, ReconstructedParticles)")
.Define("selpt_gamma", "FCCAnalyses::ReconstructedParticle::sel_pt(30.)(gamma)")
.Define("sel_gamma_unsort", "FCCAnalyses::ReconstructedParticle::sel_eta(4)(selpt_gamma)")
Expand Down Expand Up @@ -177,7 +182,8 @@ def analysers(df, out_name):

def output(out_name):
branchList = [
# Jets:
"weight",
# Jets:
"njets", "j1_e", "j1_pt", "j1_eta", "j1_phi",
"j2_e", "j2_pt", "j2_eta", "j2_phi",
# B-jets:
Expand All @@ -201,5 +207,5 @@ def output(out_name):
return branchList


# local test: fccanalysis run analysis_bbyy_selections_v2.py --nevents 100 - also switch runBatch to False
# local test: fccanalysis run analysis_noiso.py --nevents 100 - also switch runBatch to False

Loading

0 comments on commit d7aa452

Please sign in to comment.