diff --git a/docs/part3/commonstatsmethods.md b/docs/part3/commonstatsmethods.md index 0b26c26c2f1..04ac5d7d0aa 100644 --- a/docs/part3/commonstatsmethods.md +++ b/docs/part3/commonstatsmethods.md @@ -346,6 +346,9 @@ The choice of test-statistic can be made via the option `--testStat` and differe While the above shortcuts are the common variants, you can also try others. The treatment of the nuisances can be changed to the so-called "Hybrid-Bayesian" method which effectively integrates over the nuisance parameters. This is especially relevant when you have very few expected events in your data and you are using those events to constrain background processes. This can be achieved by setting `--generateNuisances=1 --generateExternalMeasurements=0`. You might also want to avoid first fitting to the data to choose the nominal values in this case, which can be done by also setting `--fitNuisances=0`. +!!! warning + If you have unconstrained parameters in your model (`rateParam` or if using a `_norm` variable for a pdf) and you want to use the "Hybrid-Bayesian" method, you **must** declare these as `flatParam` in your datacard and when running text2workspace you must add the option `--X-assign-flatParam-prior` in the command line. This will create uniform priors for these parameters, which is needed for this method and which otherwise would not get created. + !!! info Note that (observed and toy) values of the test statistic stored in the instances of `RooStats::HypoTestResult` when the option `--saveHybridResult` has been specified, are defined without the factor 2 and therefore are twice as small as the values given by the formulas above. This factor is however included automatically by all plotting script supplied within the Combine package. diff --git a/docs/part3/runningthetool.md b/docs/part3/runningthetool.md index 595ef3c0dd4..a4a93a6fa7a 100644 --- a/docs/part3/runningthetool.md +++ b/docs/part3/runningthetool.md @@ -186,7 +186,7 @@ You can turn off the internal logic by setting `--X-rtd TMCSO_AdaptivePseudoAsim #### Nuisance parameter generation -The default method of dealing with systematics is to generate random values (around their nominal values, see above) for the nuisance parameters, according to their prior pdfs centred around their default values, *before* generating the data. The *unconstrained* nuisance parameters (eg `flatParam` or `rateParam`) or those with *flat* priors are **not** randomised before the data generation. +The default method of dealing with systematics is to generate random values (around their nominal values, see above) for the nuisance parameters, according to their prior pdfs centred around their default values, *before* generating the data. The *unconstrained* nuisance parameters (eg `flatParam` or `rateParam`) or those with *flat* priors are **not** randomised before the data generation. If you wish to also randomise these parameters, you **must** declare these as `flatParam` in your datacard and when running text2workspace you must add the option `--X-assign-flatParam-prior` in the command line. The following are options which define how the toys will be generated, diff --git a/python/Datacard.py b/python/Datacard.py index a3d885dce8f..2d13048bcdd 100644 --- a/python/Datacard.py +++ b/python/Datacard.py @@ -63,6 +63,12 @@ def __init__(self): self.groups = {} self.discretes = [] + # list of parameters called _norm in user input workspace + self.pdfnorms = {} + + # collection of nuisances to auto-produce flat priors for + self.toCreateFlatParam = {} + def print_structure(self): """ Print the contents of the -> should allow for direct text2workspace on python config @@ -140,6 +146,7 @@ def print_structure(self): print("DC.binParFlags = ", self.binParFlags, "#", type(self.binParFlags)) print("DC.groups = ", self.groups, "#", type(self.groups)) print("DC.discretes = ", self.discretes, "#", type(self.discretes)) + print("DC.pdfnorms = ", self.pdfnorms, "#", type(self.pdfnorms)) print( """ diff --git a/python/DatacardParser.py b/python/DatacardParser.py index ba34b588b1d..910c06b153b 100644 --- a/python/DatacardParser.py +++ b/python/DatacardParser.py @@ -279,6 +279,13 @@ def addDatacardParserOptions(parser): default=None, help="Simplify MH dependent objects: 'fixed', 'pol' with N=0..4", ) + parser.add_option( + "--X-assign-flatParam-prior", + dest="flatParamPrior", + default=False, + action="store_true", + help="Assign RooUniform pdf for flatParam NPs", + ) def strip(l): @@ -359,6 +366,9 @@ def parseCard(file, options): if not hasattr(options, "evaluateEdits"): setattr(options, "evaluateEdits", True) + if not hasattr(options, "flatParamPrior"): + setattr(options, "flatParamPrior", False) + try: for lineNumber, l in enumerate(file): f = l.split() @@ -522,6 +532,9 @@ def parseCard(file, options): elif pdf == "flatParam": ret.flatParamNuisances[lsyst] = True # for flat parametric uncertainties, code already does the right thing as long as they are non-constant RooRealVars linked to the model + if options.flatParamPrior: + ret.systs.append([lsyst, nofloat, pdf, args, []]) + ret.add_syst_id(lsyst) continue elif pdf == "extArg": # look for additional parameters in workspaces @@ -668,7 +681,7 @@ def parseCard(file, options): syst2 = [] for lsyst, nofloat, pdf, args, errline in ret.systs: nonNullEntries = 0 - if pdf == "param" or pdf == "constr" or pdf == "discrete" or pdf == "rateParam": # this doesn't have an errline + if pdf == "param" or pdf == "constr" or pdf == "discrete" or pdf == "rateParam" or pdf == "flatParam": # this doesn't have an errline syst2.append((lsyst, nofloat, pdf, args, errline)) continue for b, p, s in ret.keyline: diff --git a/python/ModelTools.py b/python/ModelTools.py index 5a427d3664a..22afb015e7b 100644 --- a/python/ModelTools.py +++ b/python/ModelTools.py @@ -154,6 +154,14 @@ def __init__(self, datacard, options): self.selfNormBins = [] self.extraNuisances = [] self.extraGlobalObservables = [] + self.globalobs = [] + + def getSafeNormName(self, n): + # need to be careful in case user has _norm name and wants to auto-create flatPrior + if self.options.flatParamPrior: + if n in self.DC.pdfnorms.keys(): + return self.DC.pdfnorms[n] + return n def setPhysics(self, physicsModel): self.physics = physicsModel @@ -174,6 +182,8 @@ def doModel(self, justCheckPhysicsModel=False): self.doNuisances() self.doExtArgs() self.doRateParams() + self.doAutoFlatNuisancePriors() + self.doFillNuisPdfsAndSets() self.doExpectedEvents() if justCheckPhysicsModel: self.physics.done() @@ -316,6 +326,12 @@ def doRateParams(self): v = float(argv) removeRange = len(param_range) == 0 if param_range == "": + if self.options.flatParamPrior: + raise ValueError( + "Cannot create flat Prior for rateParam nuisance parameter '" + + argu + + "' without specifying a range [a,b]. Please fix in the datacard" + ) ## check range. The parameter needs to be created in range. Then we will remove it param_range = "%g,%g" % (-2.0 * abs(v), 2.0 * abs(v)) # additional check for range requested @@ -382,7 +398,7 @@ def doNuisances(self): if len(self.DC.systs) == 0: return self.doComment(" ----- nuisances -----") - globalobs = [] + # globalobs = [] for n, nofloat, pdf, args, errline in self.DC.systs: is_func_scaled = False @@ -433,7 +449,7 @@ def doNuisances(self): # Use existing constraint since it could be a param self.out.var(n).setVal(0) self.out.var(n).setError(1) - globalobs.append("%s_In" % n) + self.globalobs.append("%s_In" % n) if self.options.bin: self.out.var("%s_In" % n).setConstant(True) if self.options.optimizeBoundNuisances and not is_func_scaled: @@ -466,7 +482,7 @@ def doNuisances(self): theta, ), ) - globalobs.append("%s_In" % n) + self.globalobs.append("%s_In" % n) if self.options.bin: self.out.var("%s_In" % n).setConstant(True) elif pdf == "gmN": @@ -501,7 +517,7 @@ def doNuisances(self): "Poisson", "%s_In[%d,%f,%f], %s[%f,%f,%f], 1" % (n, args[0], minObs, maxObs, n, args[0] + 1, minExp, maxExp), ) - globalobs.append("%s_In" % n) + self.globalobs.append("%s_In" % n) if self.options.bin: self.out.var("%s_In" % n).setConstant(True) elif pdf == "trG": @@ -515,13 +531,21 @@ def doNuisances(self): trG_max = -1.0 / v r = "%f,%f" % (trG_min, trG_max) self.doObj("%s_Pdf" % n, "Gaussian", "%s[0,%s], %s_In[0,%s], 1" % (n, r, n, r)) - globalobs.append("%s_In" % n) + self.globalobs.append("%s_In" % n) if self.options.bin: self.out.var("%s_In" % n).setConstant(True) elif pdf == "lnU" or pdf == "shapeU": self.doObj("%s_Pdf" % n, "Uniform", "%s[-1,1]" % n) elif pdf == "unif": self.doObj("%s_Pdf" % n, "Uniform", "%s[%f,%f]" % (n, args[0], args[1])) + elif pdf == "flatParam" and self.options.flatParamPrior: + c_param_name = self.getSafeNormName(n) + if self.out.var(c_param_name): + v, x1, x2 = self.out.var(c_param_name).getVal(), self.out.var(c_param_name).getMin(), self.out.var(c_param_name).getMax() + self.DC.toCreateFlatParam[c_param_name] = [v, x1, x2] + else: + self.DC.toCreateFlatParam[c_param_name] = [] + elif pdf == "dFD" or pdf == "dFD2": dFD_min = -(1 + 8 / args[0]) dFD_max = +(1 + 8 / args[0]) @@ -546,7 +570,7 @@ def doNuisances(self): ROOFIT_EXPR_PDF, "'1/(2*(1+exp(%f*(@0-1)))*(1+exp(-%f*(@0+1))))', %s[0,%s], %s_In[0,%s]" % (args[0], args[0], n, r, n, r), ) - globalobs.append("%s_In" % n) + self.globalobs.append("%s_In" % n) if self.options.bin: self.out.var("%s_In" % n).setConstant(True) elif pdf == "constr": @@ -760,7 +784,7 @@ def doNuisances(self): self.out.function("%s_BoundLo" % n), self.out.function("%s_BoundHi" % n), ) - globalobs.append("%s_In" % n) + self.globalobs.append("%s_In" % n) # if self.options.optimizeBoundNuisances: self.out.var(n).setAttribute("optimizeBounds") elif pdf == "extArg": continue @@ -772,15 +796,18 @@ def doNuisances(self): # self.out.var(n).Print('V') if n in self.DC.frozenNuisances: self.out.var(n).setConstant(True) + + def doFillNuisPdfsAndSets(self): if self.options.bin: # avoid duplicating _Pdf in list setNuisPdf = [] nuisPdfs = ROOT.RooArgList() nuisVars = ROOT.RooArgSet() for n, nf, p, a, e in self.DC.systs: + c_param_name = self.getSafeNormName(n) if p != "constr": - nuisVars.add(self.out.var(n)) - setNuisPdf.append(n) + nuisVars.add(self.out.var(c_param_name)) + setNuisPdf.append(c_param_name) setNuisPdf = set(setNuisPdf) for n in setNuisPdf: nuisPdfs.add(self.out.pdf(n + "_Pdf")) @@ -789,15 +816,33 @@ def doNuisances(self): self.out.safe_import(self.out.nuisPdf) self.out.nuisPdfs = nuisPdfs gobsVars = ROOT.RooArgSet() - for g in globalobs: + for g in self.globalobs: gobsVars.add(self.out.var(g)) self.out.defineSet("globalObservables", gobsVars) else: # doesn't work for too many nuisances :-( # avoid duplicating _Pdf in list - setNuisPdf = set([n for (n, nf, p, a, e) in self.DC.systs]) - self.doSet("nuisances", ",".join(["%s" % n for (n, nf, p, a, e) in self.DC.systs])) + setNuisPdf = set([self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs]) + self.doSet("nuisances", ",".join(["%s" % self.getSafeNormName(n) for (n, nf, p, a, e) in self.DC.systs])) self.doObj("nuisancePdf", "PROD", ",".join(["%s_Pdf" % n for n in setNuisPdf])) - self.doSet("globalObservables", ",".join(globalobs)) + self.doSet("globalObservables", ",".join(self.globalobs)) + + def doAutoFlatNuisancePriors(self): + if len(self.DC.toCreateFlatParam.keys()) > 0: + for flatNP in self.DC.toCreateFlatParam.items(): + c_param_name = flatNP[0] + c_param_details = flatNP[1] + if len(c_param_details): + v, x1, x2 = c_param_details + else: + v, x1, x2 = self.out.var(c_param_name).getVal(), self.out.var(c_param_name).getMin(), self.out.var(c_param_name).getMax() + if self.options.verbose > 2: + print("Will create flat prior for parameter ", c_param_name, " with range [", x1, x2, "]") + self.doExp( + "%s_diff_expr" % c_param_name, "%s-%s_In" % (c_param_name, c_param_name), "%s,%s_In[%g,%g,%g]" % (c_param_name, c_param_name, v, x1, x2) + ) + self.doObj("%s_Pdf" % c_param_name, "Uniform", "%s_diff_expr" % c_param_name) + self.out.var("%s_In" % c_param_name).setConstant(True) + self.globalobs.append("%s_In" % c_param_name) def doNuisancesGroups(self): # Prepare a dictionary of which group a certain nuisance belongs to @@ -878,7 +923,7 @@ def doExpectedEvents(self): continue if pdf == "constr": continue - if pdf == "rateParam": + if pdf == "rateParam" or pdf == "flatParam": continue if p not in errline[b]: continue @@ -899,6 +944,7 @@ def doExpectedEvents(self): logNorms.append((errline[b][p], n)) elif pdf == "gmM": factors.append(n) + # elif pdf == "trG" or pdf == "unif" or pdf == "flatParam" or pdf == "dFD" or pdf == "dFD2": elif pdf == "trG" or pdf == "unif" or pdf == "dFD" or pdf == "dFD2": myname = "n_exp_shift_bin%s_proc_%s_%s" % (b, p, n) self.doObj(myname, ROOFIT_EXPR, "'1+%f*@0', %s" % (errline[b][p], n)) diff --git a/python/ShapeTools.py b/python/ShapeTools.py index 184112d14ab..a4ee6922c79 100644 --- a/python/ShapeTools.py +++ b/python/ShapeTools.py @@ -615,6 +615,8 @@ def prepareAllShapes(self): raise RuntimeError("There's more than once choice of observables: %s\n" % str(list(shapeObs.keys()))) self.out.binVars = list(shapeObs.values())[0] self.out.safe_import(self.out.binVars) + # keep hold of pdf_norm renaming + self.DC.pdfnorms = self.norm_rename_map.copy() def doCombinedDataset(self): if len(self.DC.bins) == 1 and self.options.forceNonSimPdf: