diff --git a/CombineTools/bin/BuildFile.xml b/CombineTools/bin/BuildFile.xml index d143e578a85..b211ea7fafa 100644 --- a/CombineTools/bin/BuildFile.xml +++ b/CombineTools/bin/BuildFile.xml @@ -14,6 +14,7 @@ + diff --git a/CombineTools/bin/PostFit2DShapesFromWorkspace.cpp b/CombineTools/bin/PostFit2DShapesFromWorkspace.cpp new file mode 100644 index 00000000000..050ccea9646 --- /dev/null +++ b/CombineTools/bin/PostFit2DShapesFromWorkspace.cpp @@ -0,0 +1,460 @@ +#include +#include "boost/program_options.hpp" +#include "boost/format.hpp" +#include "TSystem.h" +#include "TH2F.h" +#include "CombineHarvester/CombineTools/interface/CombineHarvester.h" +#include "CombineHarvester/CombineTools/interface/ParseCombineWorkspace.h" +#include "CombineHarvester/CombineTools/interface/TFileIO.h" +#include "CombineHarvester/CombineTools/interface/Logging.h" + +namespace po = boost::program_options; + +using namespace std; + +void ReverseBins(TH2F & h) { + std::vector contents(h.GetNbinsX()*h.GetNbinsY()); + std::vector errors(h.GetNbinsX()*h.GetNbinsY()); + for (int j = 0; j < h.GetNbinsY(); ++j) { + for (int i = 0; i < h.GetNbinsX(); ++i) { + contents[j*h.GetNbinsX()+i] = h.GetBinContent(i + 1, j + 1); + errors[j*h.GetNbinsX()+i] = h.GetBinError(i + 1, j + 1); + } + } + for (int j = 0; j < h.GetNbinsY(); ++j) { + for (int i = 0; i < h.GetNbinsX(); ++i) { + h.SetBinContent(h.GetNbinsX()*h.GetNbinsY() - j*h.GetNbinsX()+i, contents[j*h.GetNbinsX()+i]); + h.SetBinError(h.GetNbinsX()*h.GetNbinsY() - j*h.GetNbinsX()+i, errors[j*h.GetNbinsX()+i]); + } + } + // return h; +} + +int main(int argc, char* argv[]) { + // Need this to read combine workspaces + gSystem->Load("libHiggsAnalysisCombinedLimit"); + + string datacard = ""; + string workspace = ""; + string fitresult = ""; + string mass = ""; + bool postfit = false; + bool sampling = false; + bool no_sampling = false; + string output = ""; + bool factors = false; + unsigned samples = 500; + std::string freeze_arg = ""; + bool covariance = false; + string data = "data_obs"; + bool skip_prefit = false; + bool skip_proc_errs = false; + bool total_shapes = false; + std::vector reverse_bins_; + + po::options_description help_config("Help"); + help_config.add_options() + ("help,h", "produce help message"); + + po::options_description config("Configuration"); + config.add_options() + ("workspace,w", + po::value(&workspace)->required(), + "The input workspace-containing file [REQUIRED]") + ("dataset", + po::value(&data)->default_value(data), + "The input dataset name") + ("datacard,d", + po::value(&datacard), + "The input datacard, only used for rebinning") + ("output,o ", + po::value(&output)->required(), + "Name of the output root file to create [REQUIRED]") + ("fitresult,f", + po::value(&fitresult)->default_value(fitresult), + "Path to a RooFitResult, only needed for postfit") + ("mass,m", + po::value(&mass)->default_value(""), + "Signal mass point of the input datacard") + ("postfit", + po::value(&postfit) + ->default_value(postfit)->implicit_value(true), + "Create post-fit histograms in addition to pre-fit") + ("sampling", + po::value(&sampling)->default_value(sampling)->implicit_value(true), + "Use the cov. matrix sampling method for the post-fit uncertainty (deprecated, this is the default)") + ("no-sampling", + po::value(&no_sampling)->default_value(no_sampling)->implicit_value(true), + "Do not use the cov. matrix sampling method for the post-fit uncertainty") + ("samples", + po::value(&samples)->default_value(samples), + "Number of samples to make in each evaluate call") + ("print", + po::value(&factors)->default_value(factors)->implicit_value(true), + "Print tables of background shifts and relative uncertainties") + ("freeze", + po::value(&freeze_arg)->default_value(freeze_arg), + "Format PARAM1,PARAM2=X,PARAM3=Y where the values X and Y are optional") + ("covariance", + po::value(&covariance)->default_value(covariance)->implicit_value(true), + "Save the covariance and correlation matrices of the process yields") + ("skip-prefit", + po::value(&skip_prefit)->default_value(skip_prefit)->implicit_value(true), + "Skip the pre-fit evaluation") + ("skip-proc-errs", + po::value(&skip_proc_errs)->default_value(skip_proc_errs)->implicit_value(true), + "Skip evaluation of errors on individual processes") + ("total-shapes", + po::value(&total_shapes)->default_value(total_shapes)->implicit_value(true), + "Save signal- and background shapes added for all channels/categories") + ("reverse-bins", po::value>(&reverse_bins_)->multitoken(), "List of bins to reverse the order for"); + + po::variables_map vm; + + // First check if the user has set the "--help" or "-h" option, and if so + // just prin the usage information and quit + po::store(po::command_line_parser(argc, argv) + .options(help_config).allow_unregistered().run(), vm); + po::notify(vm); + if (vm.count("help")) { + cout << config << "\nExample usage:\n"; + cout << "PostFit2DShapesFromWorkspace.root -d htt_mt_125.txt -w htt_mt_125.root -o htt_mt_125_shapes.root -m 125 " + "-f mlfit.root:fit_s --postfit --print\n"; + return 1; + } + + // Parse the main config options + po::store(po::command_line_parser(argc, argv).options(config).run(), vm); + po::notify(vm); + + if (sampling) { + std::cout<<"WARNING: the default behaviour of PostFitShapesFromWorkspace is to use the covariance matrix sampling method for the post-fit uncertainty. The option --sampling is deprecated and will be removed in future versions of CombineHarvester"<(gDirectory->Get("w")); + + if (!ws) { + throw std::runtime_error( + FNERROR("Could not locate workspace in input file")); + } + + // Create CH instance and parse the workspace + ch::CombineHarvester cmb; + cmb.SetFlag("workspaces-use-clone", true); + ch::ParseCombineWorkspace(cmb, *ws, "ModelConfig", data, false); + + // Only evaluate in case parameters to freeze are provided + if(! freeze_arg.empty()) + { + vector freeze_vec; + boost::split(freeze_vec, freeze_arg, boost::is_any_of(",")); + for (auto const& item : freeze_vec) { + vector parts; + boost::split(parts, item, boost::is_any_of("=")); + if (parts.size() == 1) { + ch::Parameter *par = cmb.GetParameter(parts[0]); + if (par) par->set_frozen(true); + else throw std::runtime_error( + FNERROR("Requested variable to freeze does not exist in workspace")); + } else { + if (parts.size() == 2) { + ch::Parameter *par = cmb.GetParameter(parts[0]); + if (par) { + par->set_val(boost::lexical_cast(parts[1])); + par->set_frozen(true); + } + else throw std::runtime_error( + FNERROR("Requested variable to freeze does not exist in workspace")); + } + } + } + } + // cmb.GetParameter("r")->set_frozen(true); + + ch::CombineHarvester cmb_card; + cmb_card.SetFlag("workspaces-use-clone",true); + if (datacard != "") { + cmb_card.ParseDatacard(datacard, "", "", "", 0, mass); + } + + // Drop any process that has no hist/data/pdf + cmb.FilterProcs([&](ch::Process * proc) { + bool no_shape = !proc->shape() && !proc->data() && !proc->pdf(); + if (no_shape) { + cout << "Filtering process with no shape:\n"; + cout << ch::Process::PrintHeader << *proc << "\n"; + } + return no_shape; + }); + + auto bins = cmb.cp().bin_set(); + + TFile outfile(output.c_str(), "RECREATE"); + TH1::AddDirectory(false); + + // Create a map of maps for storing histograms in the form: + // pre_shapes[][] + map> pre_shapes; + + // Also create a simple map for storing total histograms, summed + // over all bins, in the form: + // pre_shapes_tot[] + map pre_shapes_tot; + + // We can always do the prefit version, + // Loop through the bins writing the shapes to the output file + if (!skip_prefit) { + if(total_shapes){ + pre_shapes_tot["data_obs"] = cmb.GetObserved2DShape(); + // Then fill total signal and total bkg hists + std::cout << ">> Doing prefit: TotalBkg" << std::endl; + pre_shapes_tot["TotalBkg"] = + cmb.cp().backgrounds().Get2DShapeWithUncertainty(); + std::cout << ">> Doing prefit: TotalSig" << std::endl; + pre_shapes_tot["TotalSig"] = + cmb.cp().signals().Get2DShapeWithUncertainty(); + std::cout << ">> Doing prefit: TotalProcs" << std::endl; + pre_shapes_tot["TotalProcs"] = + cmb.cp().Get2DShapeWithUncertainty(); + + if (datacard != "") { + TH2F ref = cmb_card.cp().GetObserved2DShape(); + for (auto & it : pre_shapes_tot) { + it.second = ch::RestoreBinning2D(it.second, ref); + } + } + + // Can write these straight into the output file + outfile.cd(); + for (auto& iter : pre_shapes_tot) { + ch::WriteToTFile(&(iter.second), &outfile, "prefit/" + iter.first); + } + } + for (auto bin : bins) { + ch::CombineHarvester cmb_bin = cmb.cp().bin({bin}); + // This next line is a temporary fix for models with parameteric RooFit pdfs + // - we try and set the number of bins to evaluate the pdf to be the same as + // the number of bins in data + // cmb_bin.SetPdfBins(cmb_bin.GetObservedShape().GetNbinsX()); + + // Fill the data and process histograms + pre_shapes[bin]["data_obs"] = cmb_bin.GetObserved2DShape(); + for (auto proc : cmb_bin.process_set()) { + std::cout << ">> Doing prefit: " << bin << "," << proc << std::endl; + if (skip_proc_errs) { + pre_shapes[bin][proc] = + cmb_bin.cp().process({proc}).Get2DShape(); + } else { + pre_shapes[bin][proc] = + cmb_bin.cp().process({proc}).Get2DShapeWithUncertainty(); + } + } + + // The fill total signal and total bkg hists + std::cout << ">> Doing prefit: " << bin << "," << "TotalBkg" << std::endl; + pre_shapes[bin]["TotalBkg"] = + cmb_bin.cp().backgrounds().Get2DShapeWithUncertainty(); + std::cout << ">> Doing prefit: " << bin << "," << "TotalSig" << std::endl; + pre_shapes[bin]["TotalSig"] = + cmb_bin.cp().signals().Get2DShapeWithUncertainty(); + std::cout << ">> Doing prefit: " << bin << "," << "TotalProcs" << std::endl; + pre_shapes[bin]["TotalProcs"] = + cmb_bin.cp().Get2DShapeWithUncertainty(); + + + if (datacard != "") { + TH2F ref = cmb_card.cp().bin({bin}).GetObserved2DShape(); + for (auto & it : pre_shapes[bin]) { + it.second = ch::RestoreBinning2D(it.second, ref); + } + } + + for (auto const& rbin : reverse_bins_) { + if (rbin != bin) continue; + auto & hists = pre_shapes[bin]; + for (auto it = hists.begin(); it != hists.end(); ++it) { + ReverseBins(it->second); + } + } + // Can write these straight into the output file + outfile.cd(); + for (auto& iter : pre_shapes[bin]) { + ch::WriteToTFile(&(iter.second), &outfile, bin + "_prefit/" + iter.first); + } + } + + // Print out the relative uncert. on the bkg + if (factors) { + cout << boost::format("%-25s %-32s\n") % "Bin" % + "Total relative bkg uncert. (prefit)"; + cout << string(58, '-') << "\n"; + for (auto bin : bins) { + ch::CombineHarvester cmb_bin = cmb.cp().bin({bin}); + double rate = cmb_bin.cp().backgrounds().GetRate(); + double err = cmb_bin.cp().backgrounds().GetUncertainty(); + cout << boost::format("%-25s %-10.5f") % bin % + (rate > 0. ? (err / rate) : 0.) << std::endl; + } + } + } + + + // Now we can do the same again but for the post-fit model + if (postfit) { + // Get the fit result and update the parameters to the post-fit model + RooFitResult res = ch::OpenFromTFile(fitresult); + cmb.UpdateParameters(res); + + // Calculate the post-fit fractional background uncertainty in each bin + + map> post_shapes; + map post_yield_cov; + map post_yield_cor; + + map post_shapes_tot; + + if(total_shapes){ + post_shapes_tot["data_obs"] = cmb.GetObserved2DShape(); + // Fill the total sig. and total bkg. hists + auto cmb_bkgs = cmb.cp().backgrounds(); + auto cmb_sigs = cmb.cp().signals(); + std::cout << ">> Doing postfit: TotalBkg" << std::endl; + post_shapes_tot["TotalBkg"] = + no_sampling ? cmb_bkgs.Get2DShapeWithUncertainty() + : cmb_bkgs.Get2DShapeWithUncertainty(res, samples); + std::cout << ">> Doing postfit: TotalSig" << std::endl; + post_shapes_tot["TotalSig"] = + no_sampling ? cmb_sigs.Get2DShapeWithUncertainty() + : cmb_sigs.Get2DShapeWithUncertainty(res, samples); + std::cout << ">> Doing postfit: TotalProcs" << std::endl; + post_shapes_tot["TotalProcs"] = + no_sampling ? cmb.cp().Get2DShapeWithUncertainty() + : cmb.cp().Get2DShapeWithUncertainty(res, samples); + + if (datacard != "") { + TH2F ref = cmb_card.cp().GetObserved2DShape(); + for (auto & it : post_shapes_tot) { + it.second = ch::RestoreBinning2D(it.second, ref); + } + } + + outfile.cd(); + // Write the post-fit histograms + for (auto & iter : post_shapes_tot) { + ch::WriteToTFile(&(iter.second), &outfile, + "postfit/" + iter.first); + } + } + + + for (auto bin : bins) { + ch::CombineHarvester cmb_bin = cmb.cp().bin({bin}); + post_shapes[bin]["data_obs"] = cmb_bin.GetObserved2DShape(); + for (auto proc : cmb_bin.process_set()) { + auto cmb_proc = cmb_bin.cp().process({proc}); + // Method to get the shape uncertainty depends on whether we are using + // the sampling method or the "wrong" method (assumes no correlations) + std::cout << ">> Doing postfit: " << bin << "," << proc << std::endl; + if (skip_proc_errs) { + post_shapes[bin][proc] = cmb_proc.Get2DShape(); + } else { + post_shapes[bin][proc] = + no_sampling ? cmb_proc.Get2DShapeWithUncertainty() + : cmb_proc.Get2DShapeWithUncertainty(res, samples); + } + } + if (!no_sampling && covariance) { + post_yield_cov[bin] = cmb_bin.GetRateCovariance(res, samples); + post_yield_cor[bin] = cmb_bin.GetRateCorrelation(res, samples); + } + // Fill the total sig. and total bkg. hists + auto cmb_bkgs = cmb_bin.cp().backgrounds(); + auto cmb_sigs = cmb_bin.cp().signals(); + std::cout << ">> Doing postfit: " << bin << "," << "TotalBkg" << std::endl; + post_shapes[bin]["TotalBkg"] = + no_sampling ? cmb_bkgs.Get2DShapeWithUncertainty() + : cmb_bkgs.Get2DShapeWithUncertainty(res, samples); + std::cout << ">> Doing postfit: " << bin << "," << "TotalSig" << std::endl; + post_shapes[bin]["TotalSig"] = + no_sampling ? cmb_sigs.Get2DShapeWithUncertainty() + : cmb_sigs.Get2DShapeWithUncertainty(res, samples); + std::cout << ">> Doing postfit: " << bin << "," << "TotalProcs" << std::endl; + post_shapes[bin]["TotalProcs"] = + no_sampling ? cmb_bin.cp().Get2DShapeWithUncertainty() + : cmb_bin.cp().Get2DShapeWithUncertainty(res, samples); + + if (datacard != "") { + TH2F ref = cmb_card.cp().bin({bin}).GetObserved2DShape(); + for (auto & it : post_shapes[bin]) { + it.second = ch::RestoreBinning2D(it.second, ref); + } + } + + outfile.cd(); + // Write the post-fit histograms + + for (auto const& rbin : reverse_bins_) { + if (rbin != bin) continue; + std::cout << ">> reversing hists in bin " << bin << "\n"; + auto & hists = post_shapes[bin]; + for (auto it = hists.begin(); it != hists.end(); ++it) { + ReverseBins(it->second); + } + } + + for (auto & iter : post_shapes[bin]) { + ch::WriteToTFile(&(iter.second), &outfile, + bin + "_postfit/" + iter.first); + } + for (auto & iter : post_yield_cov) { + ch::WriteToTFile(&(iter.second), &outfile, + iter.first+"_cov"); + } + for (auto & iter : post_yield_cor) { + ch::WriteToTFile(&(iter.second), &outfile, + iter.first+"_cor"); + } + + } + + if (factors) { + cout << boost::format("\n%-25s %-32s\n") % "Bin" % + "Total relative bkg uncert. (postfit)"; + cout << string(58, '-') << "\n"; + for (auto bin : bins) { + ch::CombineHarvester cmb_bkgs = cmb.cp().bin({bin}).backgrounds(); + double rate = cmb_bkgs.GetRate(); + double err = no_sampling ? cmb_bkgs.GetUncertainty() + : cmb_bkgs.GetUncertainty(res, samples); + cout << boost::format("%-25s %-10.5f") % bin % + (rate > 0. ? (err / rate) : 0.) << std::endl; + } + } + + // As we calculate the post-fit yields can also print out the post/pre scale + // factors + if (factors && postfit) { + cout << boost::format("\n%-25s %-20s %-10s\n") % "Bin" % "Process" % + "Scale factor"; + cout << string(58, '-') << "\n"; + for (auto bin : bins) { + ch::CombineHarvester cmb_bin = cmb.cp().bin({bin}); + + for (auto proc : cmb_bin.process_set()) { + // Print out the post/pre scale factors + TH1 const& pre = pre_shapes[bin][proc]; + TH1 const& post = post_shapes[bin][proc]; + cout << boost::format("%-25s %-20s %-10.5f\n") % bin % proc % + (pre.Integral() > 0. ? (post.Integral() / pre.Integral()) + : 1.0); + } + } + } + } + // And we're done! + outfile.Close(); + return 0; +} + diff --git a/CombineTools/interface/CombineHarvester.h b/CombineTools/interface/CombineHarvester.h index 3e400906d34..4aa704a62aa 100644 --- a/CombineTools/interface/CombineHarvester.h +++ b/CombineTools/interface/CombineHarvester.h @@ -20,7 +20,7 @@ #include "CombineHarvester/CombineTools/interface/Observation.h" #include "CombineHarvester/CombineTools/interface/Utilities.h" #include "CombineHarvester/CombineTools/interface/HistMapping.h" - +#include "HiggsAnalysis/CombinedLimit/interface/VerticalInterpPdf.h" namespace ch { @@ -276,6 +276,7 @@ class CombineHarvester { void VariableRebin(std::vector bins); void ZeroBins(double min, double max); void SetPdfBins(unsigned nbins); + void Set2DPdfBins(unsigned nbinsx, unsigned nbinsy); // double getParFromWs(const std::string name); @@ -354,6 +355,8 @@ class CombineHarvester { TH1F GetShape(); TH1F GetShapeWithUncertainty(); + TH2F Get2DShape(); + TH2F Get2DShapeWithUncertainty(); /** * Sum all Process shapes and evaluate bin-wise uncertainty by sampling from * the fit convariance matrix @@ -367,6 +370,10 @@ class CombineHarvester { TH1F GetShapeWithUncertainty(RooFitResult const& fit, unsigned n_samples); TH1F GetObservedShape(); + TH2F Get2DShapeWithUncertainty(RooFitResult const* fit, unsigned n_samples); + TH2F Get2DShapeWithUncertainty(RooFitResult const& fit, unsigned n_samples); + TH2F GetObserved2DShape(); + TH2F GetRateCovariance(RooFitResult const& fit, unsigned n_samples); TH2F GetRateCorrelation(RooFitResult const& fit, unsigned n_samples); /**@}*/ @@ -574,6 +581,9 @@ void FillHistMappings(std::vector & mappings); TH1F GetShapeInternal(ProcSystMap const& lookup, std::string const& single_sys = ""); + TH2F Get2DShapeInternal(ProcSystMap const& lookup, + std::string const& single_sys = ""); + inline double smoothStepFunc(double x) const { if (std::fabs(x) >= 1.0/*_smoothRegion*/) return x > 0 ? +1 : -1; double xnorm = x/1.0;/*_smoothRegion*/ @@ -590,6 +600,10 @@ void FillHistMappings(std::vector & mappings); // bug fix for RooConstVar compatibility between ROOT626 and workspace created with earlier versions RooWorkspace* fixRooConstVar(RooWorkspace *win, bool useRooRealVar=true, bool clean=true); + void ShapeDiff2D(double x, TH2F* target, TH1 const* nom, TH1 const* low, + TH1 const* high, bool linear); + void ShapeDiff2D(double x, TH2F* target, RooDataHist const* nom, + RooDataHist const* low, RooDataHist const* high); }; diff --git a/CombineTools/interface/Observation.h b/CombineTools/interface/Observation.h index 15d787e9837..a204c6bd512 100644 --- a/CombineTools/interface/Observation.h +++ b/CombineTools/interface/Observation.h @@ -3,6 +3,7 @@ #include #include #include "TH1.h" +#include "TH2F.h" #include "RooAbsData.h" #include "CombineHarvester/CombineTools/interface/MakeUnique.h" #include "CombineHarvester/CombineTools/interface/Object.h" @@ -30,6 +31,7 @@ class Observation : public Object { std::unique_ptr ClonedScaledShape() const; TH1F ShapeAsTH1F() const; + TH2F ShapeAsTH2F() const; void set_data(RooAbsData* data) { data_ = data; } RooAbsData const* data() const { return data_; } diff --git a/CombineTools/interface/Process.h b/CombineTools/interface/Process.h index a42c059be5d..c734cec30b2 100644 --- a/CombineTools/interface/Process.h +++ b/CombineTools/interface/Process.h @@ -3,6 +3,7 @@ #include #include #include "TH1.h" +#include "TH2F.h" #include "RooAbsPdf.h" #include "RooAbsReal.h" #include "RooAbsData.h" @@ -55,6 +56,7 @@ class Process : public Object { std::unique_ptr ClonedScaledShape() const; TH1F ShapeAsTH1F() const; + TH2F ShapeAsTH2F() const; void set_pdf(RooAbsReal* pdf) { pdf_ = pdf; } RooAbsReal const* pdf() const { return pdf_; } @@ -68,6 +70,9 @@ class Process : public Object { void set_observable(RooRealVar* obs) { cached_obs_ = obs; } RooRealVar * observable() const { return cached_obs_; } + void set_observable_y(RooRealVar* obs) { cached_obsy_ = obs; } + RooRealVar * observable_y() const { return cached_obsy_; } + std::string to_string() const; friend std::ostream& operator<< (std::ostream &out, Process const& val); static std::ostream& PrintHeader(std::ostream &out); @@ -79,6 +84,7 @@ class Process : public Object { RooAbsData* data_; RooAbsReal* norm_; RooRealVar* cached_obs_; + RooRealVar* cached_obsy_; mutable RooAbsReal* cached_int_; friend void swap(Process& first, Process& second); diff --git a/CombineTools/interface/Utilities.h b/CombineTools/interface/Utilities.h index 1b5a134525e..dd180f82c50 100644 --- a/CombineTools/interface/Utilities.h +++ b/CombineTools/interface/Utilities.h @@ -108,6 +108,7 @@ RooDataHist TH1F2Data(TH1F const& hist, RooRealVar const& x, TH1F RebinHist(TH1F const& hist); TH1F RestoreBinning(TH1F const& src, TH1F const& ref); +TH2F RestoreBinning2D(TH2F const& src, TH2F const& ref); std::vector> GenerateCombinations( diff --git a/CombineTools/src/CombineHarvester.cc b/CombineTools/src/CombineHarvester.cc index bf9beebd3ee..de4a74c9ff2 100644 --- a/CombineTools/src/CombineHarvester.cc +++ b/CombineTools/src/CombineHarvester.cc @@ -476,6 +476,7 @@ void CombineHarvester::LoadShapes(Process* entry, // Post-condition #4 // Import any paramters of the RooAbsPdf and the RooRealVar RooAbsData const* data_obj = FindMatchingData(entry); + bool twoD_flag = false; if (data_obj) { if (verbosity_ >= 2) LOGLINE(log(), "Matching RooAbsData has been found"); if (pdf&&!data) { @@ -483,9 +484,25 @@ void CombineHarvester::LoadShapes(Process* entry, ImportParameters(&argset); if (!entry->observable()) { std::string var_name; - if (data_obj) var_name = data_obj->get()->first()->GetName(); + std::string var_name_y; + + if (data_obj) { + var_name = data_obj->get()->first()->GetName(); + RooArgSet* temp_vars = (RooArgSet*)data_obj->get()->Clone(); + twoD_flag = temp_vars->getSize()>1; + if(twoD_flag){ + RooAbsArg* temp_xvar = data_obj->get()->first(); + temp_vars->remove(*temp_xvar,true,true); + var_name_y = temp_vars->first()->GetName(); + } + delete temp_vars; + } + // if (data_obj) var_name = data_obj->get()->first()->GetName(); entry->set_observable( (RooRealVar*)entry->pdf()->findServer(var_name.c_str())); + if(twoD_flag) + entry->set_observable_y( + (RooRealVar*)entry->pdf()->findServer(var_name_y.c_str())); } } if (norm) { @@ -576,7 +593,6 @@ void CombineHarvester::LoadShapes(Systematic* entry, } else if (mapping.IsPdf()) { if (verbosity_ >= 2) LOGLINE(log(), "Mapping type is RooDataHist"); // Try and get this as RooAbsData first. If this doesn't work try pdf - RooDataHist* h = (mapping.sys_ws) ? dynamic_cast(mapping.sys_ws->data(mapping.WorkspaceObj().c_str())) : nullptr; RooDataHist* h_u = diff --git a/CombineTools/src/CombineHarvester_Evaluate.cc b/CombineTools/src/CombineHarvester_Evaluate.cc index ef4bb5ff377..8583045f821 100644 --- a/CombineTools/src/CombineHarvester_Evaluate.cc +++ b/CombineTools/src/CombineHarvester_Evaluate.cc @@ -186,6 +186,92 @@ TH1F CombineHarvester::GetShapeWithUncertainty(RooFitResult const& fit, return shape; } +TH2F CombineHarvester::Get2DShapeWithUncertainty() { + auto lookup = GenerateProcSystMap(); + TH2F shape = Get2DShape(); + for (int i = 1; i <= shape.GetNbinsX(); ++i) { + for (int j = 1; j <= shape.GetNbinsY(); ++j) { + shape.SetBinError(i,j, 0.0); + } + } + for (auto param_it : params_) { + double backup = param_it.second->val(); + param_it.second->set_val(backup+param_it.second->err_d()); + TH2F shape_d = this->Get2DShapeInternal(lookup, param_it.first); + param_it.second->set_val(backup+param_it.second->err_u()); + TH2F shape_u = this->Get2DShapeInternal(lookup, param_it.first); + for (int i = 1; i <= shape.GetNbinsX(); ++i) { + for (int j = 1; j <= shape.GetNbinsY(); ++j) { + double err = std::fabs(shape_u.GetBinContent(i,j) - shape_d.GetBinContent(i,j)) / 2.0; + shape.SetBinError(i,j, err*err + shape.GetBinError(i,j)); + } + } + param_it.second->set_val(backup); + } + for (int i = 1; i <= shape.GetNbinsX(); ++i) { + for (int j = 1; j <= shape.GetNbinsY(); ++j) { + shape.SetBinError(i,j, std::sqrt(shape.GetBinError(i,j))); + } + } + return shape; +} + +TH2F CombineHarvester::Get2DShapeWithUncertainty(RooFitResult const* fit, + unsigned n_samples) { + return Get2DShapeWithUncertainty(*fit, n_samples); +} + +TH2F CombineHarvester::Get2DShapeWithUncertainty(RooFitResult const& fit, + unsigned n_samples) { + auto lookup = GenerateProcSystMap(); + TH2F shape = Get2DShapeInternal(lookup); + for (int i = 1; i <= shape.GetNbinsX(); ++i) { + for (int j = 1; j <= shape.GetNbinsY(); ++j) { + shape.SetBinError(i,j, 0.0); + } + } + // Create a backup copy of the current parameter values + auto backup = GetParameters(); + + // Calling randomizePars() ensures that the RooArgList of sampled parameters + // is already created within the RooFitResult + RooArgList const& rands = fit.randomizePars(); + + // Now create two aligned vectors of the RooRealVar parameters and the + // corresponding ch::Parameter pointers + int n_pars = rands.getSize(); + std::vector r_vec(n_pars, nullptr); + std::vector p_vec(n_pars, nullptr); + for (unsigned n = 0; n < p_vec.size(); ++n) { + r_vec[n] = dynamic_cast(rands.at(n)); + p_vec[n] = GetParameter(r_vec[n]->GetName()); + } + + // Main loop through n_samples + for (unsigned i = 0; i < n_samples; ++i) { + // Randomise and update values + fit.randomizePars(); + for (int n = 0; n < n_pars; ++n) { + if (p_vec[n]) p_vec[n]->set_val(r_vec[n]->getVal()); + } + + TH2F rand_shape = this->Get2DShapeInternal(lookup); + for (int i = 1; i <= shape.GetNbinsX(); ++i) { + for (int j = 1; j <= shape.GetNbinsY(); ++j) { + double err = std::fabs(rand_shape.GetBinContent(i,j) - shape.GetBinContent(i,j)); + shape.SetBinError(i,j, err*err + shape.GetBinError(i,j)); + } + } + } + for (int i = 1; i <= shape.GetNbinsX(); ++i) { + for (int j = 1; j <= shape.GetNbinsY(); ++j) { + shape.SetBinError(i,j, std::sqrt(shape.GetBinError(i,j)/double(n_samples))); + } + } + this->UpdateParameters(backup); + return shape; +} + TH2F CombineHarvester::GetRateCovariance(RooFitResult const& fit, unsigned n_samples) { auto lookup = GenerateProcSystMap(); @@ -432,6 +518,155 @@ TH1F CombineHarvester::GetShapeInternal(ProcSystMap const& lookup, return shape; } +TH2F CombineHarvester::Get2DShape() { + auto lookup = GenerateProcSystMap(); + return Get2DShapeInternal(lookup); +} + +TH2F CombineHarvester::Get2DShapeInternal(ProcSystMap const& lookup, + std::string const& single_sys) { + TH2F shape; + bool shape_init = false; + + for (unsigned i = 0; i < procs_.size(); ++i) { + // Might be able to skip if only interested in one nuisance + // However - we can't skip if the process has a pdf, as + // we haven't checked what the parameters are + if (single_sys != "" && !procs_[i]->pdf()) { + if (!ch::any_of(lookup[i], [&](Systematic const* sys) { + return sys->name() == single_sys; + })) continue; + } + double p_rate = procs_[i]->rate(); + if (procs_[i]->shape() || procs_[i]->data()) { + TH2F proc_shape = procs_[i]->ShapeAsTH2F(); + for (auto sys_it : lookup[i]) { + if (sys_it->type() == "rateParam") { + continue; // don't evaluate this for now + } + auto param_it = params_.find(sys_it->name()); + if (param_it == params_.end()) { + throw std::runtime_error( + FNERROR("Parameter " + sys_it->name() + + " not found in CombineHarvester instance")); + } + double x = param_it->second->val(); + if (sys_it->asymm()) { + p_rate *= logKappaForX(x * sys_it->scale(), sys_it->value_d(), + sys_it->value_u()); + if (sys_it->type() == "shape" || sys_it->type() == "shapeN2" || + sys_it->type() == "shapeU") { + bool linear = true; + if (sys_it->type() == "shapeN2") linear = false; + if (sys_it->shape_u() && sys_it->shape_d()) { + ShapeDiff2D(x * sys_it->scale(), &proc_shape, procs_[i]->shape(), + sys_it->shape_d(), sys_it->shape_u(), linear); + } + if (sys_it->data_u() && sys_it->data_d()) { + RooDataHist const* nom = + dynamic_cast(procs_[i]->data()); + if (nom) { + ShapeDiff2D(x * sys_it->scale(), &proc_shape, nom, + sys_it->data_d(), sys_it->data_u()); + } + } + } + } else { + p_rate *= std::pow(sys_it->value_u(), x * sys_it->scale()); + } + } + for (int bx = 1; bx <= proc_shape.GetNbinsX(); ++bx) { + for (int by = 1; by <= proc_shape.GetNbinsY(); ++by) { + if (proc_shape.GetBinContent(bx,by) < 0.) proc_shape.SetBinContent(bx,by, 0.); + } + } + proc_shape.Scale(p_rate); + if (!shape_init) { + proc_shape.Copy(shape); + shape.Reset(); + shape_init = true; + } + shape.Add(&proc_shape); + } else if (procs_[i]->pdf()) { + if (!procs_[i]->observable()) { + bool twoD_flag = false; + RooAbsData const* data_obj = FindMatchingData(procs_[i].get()); + std::string var_name = "CMS_th1x"; + std::string var_name_y = "CMS_th1y"; + if (data_obj) { + var_name = data_obj->get()->first()->GetName(); + RooArgSet* temp_vars = (RooArgSet*)data_obj->get()->Clone(); + twoD_flag = temp_vars->getSize()>1; + std::cout<<"2D flag in harvester_evaluate: "<get()->first(); + temp_vars->remove(*temp_xvar,true,true); + twoD_flag = temp_vars->getSize()>1; + var_name_y = temp_vars->first()->GetName(); + } + //delete temp_vars; + } + if (procs_[i]->pdf()->findServer(var_name.c_str())) { + procs_[i]->set_observable((RooRealVar *)procs_[i]->pdf()->findServer(var_name.c_str())); + if(twoD_flag){ + procs_[i]->set_observable_y((RooRealVar *)procs_[i]->pdf()->findServer(var_name_y.c_str())); + } + } + else { + VerticalInterpPdf* proc_pdf = (VerticalInterpPdf*)procs_[i]->pdf(); + auto nom_template = proc_pdf->funcList().at(0); + procs_[i]->set_observable((RooRealVar *)nom_template->findServer(var_name.c_str())); + if(twoD_flag){ + procs_[i]->set_observable_y((RooRealVar *)nom_template->findServer(var_name_y.c_str())); + } + } + } + + TH1::AddDirectory(false); + TH2F* tmp = (TH2F*)procs_[i]->observable()->createHistogram("",RooFit::Binning(procs_[i]->observable()->getBinning()), + RooFit::YVar(*(RooAbsRealLValue*)procs_[i]->observable_y(),RooFit::Binning(procs_[i]->observable_y()->getBinning())) + ); + for (int bx = 1; bx <= tmp->GetNbinsX(); ++bx) { + for (int by = 1; by <= tmp->GetNbinsY(); ++by) { + procs_[i]->observable()->setVal(tmp->GetXaxis()->GetBinCenter(bx)); + procs_[i]->observable_y()->setVal(tmp->GetYaxis()->GetBinCenter(by)); + tmp->SetBinContent(bx,by, tmp->GetXaxis()->GetBinWidth(bx) * tmp->GetYaxis()->GetBinWidth(by) * procs_[i]->pdf()->getVal()); + } + } + TH2F proc_shape = *tmp; + delete tmp; + RooAbsPdf const* aspdf = dynamic_cast(procs_[i]->pdf()); + if ((aspdf && !aspdf->selfNormalized()) || (!aspdf)) { + // LOGLINE(log(), "Have a pdf that is not selfNormalized"); + // std::cout << "Integral: " << proc_shape.Integral() << "\n"; + if (proc_shape.Integral() > 0.) { + proc_shape.Scale(1. / proc_shape.Integral()); + } + } + for (auto sys_it : lookup[i]) { + if (sys_it->type() == "rateParam") { + continue; // don't evaluate this for now + } + double x = params_[sys_it->name()]->val(); + if (sys_it->asymm()) { + p_rate *= logKappaForX(x * sys_it->scale(), sys_it->value_d(), + sys_it->value_u()); + } else { + p_rate *= std::pow(sys_it->value_u(), x * sys_it->scale()); + } + } + proc_shape.Scale(p_rate); + if (!shape_init) { + proc_shape.Copy(shape); + shape.Reset(); + shape_init = true; + } + shape.Add(&proc_shape); + } + } + return shape; +} + double CombineHarvester::GetObservedRate() { double rate = 0.0; for (unsigned i = 0; i < obs_.size(); ++i) { @@ -469,6 +704,40 @@ TH1F CombineHarvester::GetObservedShape() { return shape; } +TH2F CombineHarvester::GetObserved2DShape() { + TH2F shape; + bool shape_init = false; + + for (unsigned i = 0; i < obs_.size(); ++i) { + TH2F proc_shape; + double p_rate = obs_[i]->rate(); + if (obs_[i]->shape()) { + proc_shape = obs_[i]->ShapeAsTH2F(); + } else if (obs_[i]->data()) { + RooArgSet* temp_vars = (RooArgSet*)obs_[i]->data()->get()->Clone(); + RooRealVar temp_xvar = *(RooRealVar*)temp_vars->first(); + temp_vars->remove(temp_xvar,true,true); + RooRealVar temp_yvar = *(RooRealVar*)temp_vars->first(); + TH2F* tmp = dynamic_cast(obs_[i]->data()->createHistogram( + "", temp_xvar,RooFit::YVar(temp_yvar))); // This is probably wrong + tmp->Sumw2(false); + tmp->SetBinErrorOption(TH1::kPoisson); + proc_shape = *tmp; + //delete &tmp; + proc_shape.Scale(1. / proc_shape.Integral()); + delete temp_vars; + } + proc_shape.Scale(p_rate); + if (!shape_init) { + proc_shape.Copy(shape); + shape.Reset(); + shape_init = true; + } + shape.Add(&proc_shape); + } + return shape; +} + void CombineHarvester::ShapeDiff(double x, TH1F * target, TH1 const* nom, @@ -522,6 +791,59 @@ void CombineHarvester::ShapeDiff(double x, // } // } +void CombineHarvester::ShapeDiff2D(double x, + TH2F * target, + TH1 const* nom, + TH1 const* low, + TH1 const* high, + bool linear) { + double fx = smoothStepFunc(x); + for (int i = 1; i <= target->GetNbinsX(); ++i) { + for (int j = 1; j <= target->GetNbinsY(); ++j) { + float h = high->GetBinContent(i,j); + float l = low->GetBinContent(i,j); + float n = nom->GetBinContent(i,j); + if (!linear) { + float t = target->GetBinContent(i,j); + target->SetBinContent(i,j, t > 0. ? std::log(t) : -999.); + h = (h > 0. && n > 0.) ? std::log(h/n) : 0.; + l = (l > 0. && n > 0.) ? std::log(l/n) : 0.; + target->SetBinContent(i,j, target->GetBinContent(i,j) + + 0.5 * x * ((h - l) + (h + l) * fx)); + target->SetBinContent(i,j, std::exp(target->GetBinContent(i,j))); + } else { + target->SetBinContent(i,j, target->GetBinContent(i,j) + + 0.5 * x * ((h - l) + (h + l - 2. * n) * fx)); + } + } + } +} + +void CombineHarvester::ShapeDiff2D(double x, + TH2F * target, + RooDataHist const* nom, + RooDataHist const* low, + RooDataHist const* high) { + double fx = smoothStepFunc(x); + int k = 0; + for (int i = 1; i <= target->GetNbinsX(); ++i) { + for (int j = 1; j <= target->GetNbinsY(); ++j) { + high->get(k); + low->get(k); + nom->get(k); + // The RooDataHists are not scaled to unity (unlike in the TH1 case above) + // so we have to normalise the bin contents on the fly + float h = high->weight() / high->sumEntries(); + float l = low->weight() / low->sumEntries(); + float n = nom->weight() / nom->sumEntries(); + target->SetBinContent(i,j, target->GetBinContent(i,j) + + 0.5 * x * ((h - l) + (h + l - 2. * n) * fx)); + ++k; + } + } +} + + void CombineHarvester::RenameParameter(std::string const& oldname, std::string const& newname) { auto it = params_.find(oldname); @@ -758,6 +1080,40 @@ void CombineHarvester::SetPdfBins(unsigned nbins) { } } +void CombineHarvester::Set2DPdfBins(unsigned nbinsx, unsigned nbinsy) { + for (unsigned i = 0; i < procs_.size(); ++i) { + std::set binning_varsx; + std::set binning_varsy; + if (procs_[i]->pdf()) { + RooAbsData const* data_obj = FindMatchingData(procs_[i].get()); + std::string var_name = "CMS_th1x"; + std::string var_name_y = "CMS_th1y"; + if (data_obj) { + var_name = data_obj->get()->first()->GetName(); + RooArgSet* temp_vars = (RooArgSet*)data_obj->get()->Clone(); + RooRealVar temp_xvar = *(RooRealVar*)temp_vars->first(); + temp_vars->remove(temp_xvar,true,true); + var_name_y = temp_vars->first()->GetName(); + delete temp_vars; + } + binning_varsx.insert(var_name); + binning_varsy.insert(var_name_y); + } + for (auto & it : wspaces_) { + for (auto & var : binning_varsx) { + RooRealVar* avar = + dynamic_cast(it.second->var(var.c_str())); + if (avar) avar->setBins(nbinsx); + } + for (auto & var : binning_varsy) { + RooRealVar* avar = + dynamic_cast(it.second->var(var.c_str())); + if (avar) avar->setBins(nbinsy); + } + } + } +} + // This implementation "borowed" from // HiggsAnalysis/CombinedLimit/src/ProcessNormalization.cc double CombineHarvester::logKappaForX(double x, double k_low, diff --git a/CombineTools/src/Observation.cc b/CombineTools/src/Observation.cc index 9f33817d114..456867b95c6 100644 --- a/CombineTools/src/Observation.cc +++ b/CombineTools/src/Observation.cc @@ -125,6 +125,26 @@ TH1F Observation::ShapeAsTH1F() const { return res; } +TH2F Observation::ShapeAsTH2F() const { + if (!shape_) { + throw std::runtime_error( + FNERROR("Observation object does not contain a shape")); + } + TH2F res; + // Need to get the shape as a concrete type (TH2F or TH2D) + // A nice way to do this is just to use TH2D::Copy into a fresh TH2F + TH2F const* test_f = dynamic_cast(this->shape()); + TH2D const* test_d = dynamic_cast(this->shape()); + if (test_f) { + test_f->Copy(res); + } else if (test_d) { + test_d->Copy(res); + } else { + throw std::runtime_error(FNERROR("TH2 shape is not a TH2F or a TH2D")); + } + return res; +} + std::ostream& Observation::PrintHeader(std::ostream &out) { std::string line = (boost::format("%-6s %-9s %-6s %-8s %-28s %-3i %-21s %-10.5g %-5i") diff --git a/CombineTools/src/Process.cc b/CombineTools/src/Process.cc index 186b21ce1f5..32a36b3742d 100644 --- a/CombineTools/src/Process.cc +++ b/CombineTools/src/Process.cc @@ -26,6 +26,7 @@ Process::Process() data_(nullptr), norm_(nullptr), cached_obs_(nullptr), + cached_obsy_(nullptr), cached_int_(nullptr) { } @@ -42,6 +43,7 @@ void swap(Process& first, Process& second) { swap(first.data_, second.data_); swap(first.norm_, second.norm_); swap(first.cached_obs_, second.cached_obs_); + swap(first.cached_obsy_, second.cached_obsy_); swap(first.cached_int_, second.cached_int_); } @@ -52,6 +54,7 @@ Process::Process(Process const& other) data_(other.data_), norm_(other.norm_), cached_obs_(other.cached_obs_), + cached_obsy_(other.cached_obsy_), cached_int_(nullptr) { TH1 *h = nullptr; if (other.shape_) { @@ -69,6 +72,7 @@ Process::Process(Process&& other) data_(nullptr), norm_(nullptr), cached_obs_(nullptr), + cached_obsy_(nullptr), cached_int_(nullptr) { swap(*this, other); } @@ -150,6 +154,38 @@ TH1F Process::ShapeAsTH1F() const { return res; } +TH2F Process::ShapeAsTH2F() const { + if (!shape_ && !data_) { + throw std::runtime_error( + FNERROR("Process object does not contain a shape")); + } + TH2F res; + if (this->shape()) { + // Need to get the shape as a concrete type (TH2F or TH1D) + // A nice way to do this is just to use TH1D::Copy into a fresh TH2F + TH2F const* test_f = dynamic_cast(this->shape()); + TH2D const* test_d = dynamic_cast(this->shape()); + if (test_f) { + test_f->Copy(res); + } else if (test_d) { + test_d->Copy(res); + } else { + throw std::runtime_error(FNERROR("TH2 shape is not a TH2F or a TH2D")); + } + } else if (this->data()) { + RooArgSet* temp_vars = (RooArgSet*)this->data()->get()->Clone(); + RooRealVar temp_xvar = *(RooRealVar*)temp_vars->first(); + temp_vars->remove(temp_xvar,true,true); + RooRealVar temp_yvar = *(RooRealVar*)temp_vars->first(); + TH2F *tmp = dynamic_cast(this->data()->createHistogram("", + temp_xvar,RooFit::YVar(temp_yvar))); + res = *tmp; + delete tmp; + if (res.Integral() > 0.) res.Scale(1. / res.Integral()); + } + return res; +} + std::ostream& Process::PrintHeader(std::ostream& out) { std::string line = (boost::format( diff --git a/CombineTools/src/Utilities.cc b/CombineTools/src/Utilities.cc index 0a6ca3143d3..7d290f7468e 100644 --- a/CombineTools/src/Utilities.cc +++ b/CombineTools/src/Utilities.cc @@ -188,6 +188,18 @@ TH1F RestoreBinning(TH1F const& src, TH1F const& ref) { return res; } +TH2F RestoreBinning2D(TH2F const& src, TH2F const& ref) { + TH2F res = ref; + res.Reset(); + for (int y = 1; y <= res.GetNbinsY(); ++y) { + for (int x = 1; x <= res.GetNbinsX(); ++x) { + res.SetBinContent(x,y, src.GetBinContent(x,y)); + res.SetBinError(x,y, src.GetBinError(x,y)); + } + } + return res; +} + std::vector> GenerateCombinations( std::vector vec) { unsigned n = vec.size();