Skip to content

Commit

Permalink
Merge branch 'master' of github.com:columnflow/columnflow into docs_e…
Browse files Browse the repository at this point in the history
…xample_py

Conflicts:
	columnflow/tasks/framework/mixins.py
  • Loading branch information
haddadanas committed May 23, 2024
2 parents 61bf212 + da009d3 commit d60f8ec
Show file tree
Hide file tree
Showing 26 changed files with 929 additions and 183 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,14 @@
Configuration of the __cf_analysis_name__ analysis.
"""

import functools

import law
import order as od
from scinum import Number

from columnflow.util import DotDict, maybe_import
from columnflow.columnar_util import EMPTY_FLOAT, ColumnCollection
from columnflow.config_util import (
get_root_processes_from_campaign, add_shift_aliases, get_shifts_from_sources, add_category,
verify_config_processes,
get_root_processes_from_campaign, add_shift_aliases, add_category, verify_config_processes,
)

ak = maybe_import("awkward")
Expand Down Expand Up @@ -112,6 +109,7 @@
cfg.x.default_calibrator = "example"
cfg.x.default_selector = "example"
cfg.x.default_producer = "example"
cfg.x.default_weight_producer = "example"
cfg.x.default_ml_model = None
cfg.x.default_inference_model = "example"
cfg.x.default_categories = ("incl",)
Expand Down Expand Up @@ -258,13 +256,6 @@
},
})

# event weight columns as keys in an OrderedDict, mapped to shift instances they depend on
get_shifts = functools.partial(get_shifts_from_sources, cfg)
cfg.x.event_weights = DotDict({
"normalization_weight": [],
"muon_weight": get_shifts("mu"),
})

# versions per task family, either referring to strings or to callables receving the invoking
# task instance and parameters to be passed to the task family
cfg.x.versions = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# coding: utf-8
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# coding: utf-8

"""
Example event weight producer.
"""

from columnflow.weight import WeightProducer, weight_producer
from columnflow.util import maybe_import
from columnflow.config_util import get_shifts_from_sources
from columnflow.columnar_util import Route

ak = maybe_import("awkward")
np = maybe_import("numpy")


@weight_producer(
# both used columns and dependent shifts are defined in init below
# only run on mc
mc_only=True,
)
def example(self: WeightProducer, events: ak.Array, **kwargs) -> ak.Array:
# build the full event weight
weight = ak.Array(np.ones(len(events), dtype=np.float32))
for column in self.weight_columns:
weight = weight * Route(column).apply(events)

return events, weight


@example.init
def example_init(self: WeightProducer) -> None:
# store column names referring to weights to multiply
self.weight_columns = {
"normalization_weight",
"muon_weight",
}
self.uses |= self.weight_columns

# declare shifts that the produced event weight depends on
shift_sources = {
"mu",
}
self.shifts |= set(get_shifts_from_sources(self.config_inst, *shift_sources))
6 changes: 6 additions & 0 deletions columnflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@
logger.debug(f"loading production module '{m}'")
maybe_import(m.strip())

import columnflow.weight # noqa
if law.config.has_option("analysis", "weight_production_modules"):
for m in law.config.get_expanded("analysis", "weight_production_modules", [], split_csv=True):
logger.debug(f"loading weight production module '{m}'")
maybe_import(m.strip())

import columnflow.calibration # noqa
if law.config.has_option("analysis", "calibration_modules"):
for m in law.config.get_expanded("analysis", "calibration_modules", [], split_csv=True):
Expand Down
13 changes: 12 additions & 1 deletion columnflow/plotting/plot_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import order as od

from columnflow.util import maybe_import, try_int
from columnflow.util import maybe_import, try_int, try_complex

math = maybe_import("math")
hist = maybe_import("hist")
Expand Down Expand Up @@ -122,6 +122,17 @@ def apply_variable_settings(
h = h[{var_inst.name: hist.rebin(rebin_factor)}]
hists[proc_inst] = h

slices = getattr(var_inst, "slice", None) or var_inst.x("slice", None)
if (
slices and isinstance(slices, Iterable) and len(slices) >= 2 and
try_complex(slices[0]) and try_complex(slices[1])
):
slice_0 = int(slices[0]) if try_int(slices[0]) else complex(slices[0])
slice_1 = int(slices[1]) if try_int(slices[1]) else complex(slices[1])
for proc_inst, h in list(hists.items()):
h = h[{var_inst.name: slice(slice_0, slice_1)}]
hists[proc_inst] = h

return hists


Expand Down
3 changes: 3 additions & 0 deletions columnflow/production/categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,6 @@ def category_ids_init(self: Producer) -> None:
self.produces.add(categorizer)

self.categorizer_map[cat_inst].append(categorizer)

# cast to normal dict to prevent silent failures on KeyError
self.categorizer_map = dict(self.categorizer_map)
133 changes: 123 additions & 10 deletions columnflow/production/cms/btag.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,21 @@

from __future__ import annotations

import law

from columnflow.production import Producer, producer
from columnflow.util import maybe_import, InsertableDict
from columnflow.columnar_util import set_ak_column, flat_np_view, layout_ak_array

np = maybe_import("numpy")
ak = maybe_import("awkward")

logger = law.logger.get_logger(__name__)


@producer(
uses={
"Jet.hadronFlavour", "Jet.eta", "Jet.pt", "Jet.btagDeepFlavB",
"Jet.hadronFlavour", "Jet.eta", "Jet.pt",
},
# only run on mc
mc_only=True,
Expand All @@ -29,6 +33,8 @@ def btag_weights(
self: Producer,
events: ak.Array,
jet_mask: ak.Array | type(Ellipsis) = Ellipsis,
negative_b_score_action: str = "ignore",
negative_b_score_log_mode: str = "warning",
**kwargs,
) -> ak.Array:
"""
Expand All @@ -44,31 +50,90 @@ def btag_weights(
*get_btag_file* can be adapted in a subclass in case it is stored differently in the external
files.
The name of the correction set as well as a list of JEC uncertainty sources which should be
propagated through the weight calculation should be given as an auxiliary entry in the config:
The name of the correction set, a list of JEC uncertainty sources which should be
propagated through the weight calculation, and the column used for b-tagging should
be given as an auxiliary entry in the config:
.. code-block:: python
cfg.x.btag_sf = ("deepJet_shape", ["Absolute", "FlavorQCD", ...])
cfg.x.btag_sf = ("deepJet_shape", ["Absolute", "FlavorQCD", ...], "btagDeepFlavB")
*get_btag_config* can be adapted in a subclass in case it is stored differently in the config.
Optionally, a *jet_mask* can be supplied to compute the scale factor weight based only on a
subset of jets.
The *negative_b_score_action* defines the procedure of how to handle jets with a negative b-tag.
Supported modes are:
- "ignore": the *jet_mask* is extended to exclude jets with b_score < 0
- "remove": the btag_weight is set to 0 for jets with b_score < 0
- "raise": an exception is raised
The verbosity of the handling of jets with negative b-score can be
set via *negative_b_score_log_mode*, which offers the following options:
- ``"none"``: no message is given
- ``"info"``: a `logger.info` message is given
- ``"debug"``: a `logger.debug` message is given
- ``"warning"``: a `logger.warning` message is given
Resources:
- https://twiki.cern.ch/twiki/bin/view/CMS/BTagShapeCalibration?rev=26
- https://indico.cern.ch/event/1096988/contributions/4615134/attachments/2346047/4000529/Nov21_btaggingSFjsons.pdf
"""
known_actions = ("ignore", "remove", "raise")
if negative_b_score_action not in known_actions:
raise ValueError(
f"unknown negative_b_score_action '{negative_b_score_action}', "
f"known values are {','.join(known_actions)}",
)

known_log_modes = ("none", "info", "debug", "warning")
if negative_b_score_log_mode not in known_log_modes:
raise ValueError(
f"unknown negative_b_score_log_mode '{negative_b_score_log_mode}', "
f"known values are {','.join(known_log_modes)}",
)

# get the total number of jets in the chunk
n_jets_all = ak.sum(ak.num(events.Jet, axis=1))

# check that the b-tag score is not negative for all jets considered in the SF calculation
jets_negative_b_score = events.Jet[self.b_score_column][jet_mask] < 0
if ak.any(jets_negative_b_score):
msg_func = {
"none": lambda msg: None,
"info": logger.info,
"warning": logger.warning,
"debug": logger.debug,
}[negative_b_score_log_mode]
msg = f"In dataset {self.dataset_inst.name}, {ak.sum(jets_negative_b_score)} jets have a negative b-tag score."

if negative_b_score_action == "ignore":
msg_func(
f"{msg} The *jet_mask* will be adjusted to exclude these jets, resulting in a "
"*btag_weight* of 1 for these jets.",
)
elif negative_b_score_action == "remove":
msg_func(
f"{msg} The *btag_weight* will be set to 0 for these jets.",
)
elif negative_b_score_action == "raise":
raise Exception(msg)

# set jet mask to False when b_score is negative
if jet_mask is Ellipsis:
jet_mask = (events.Jet[self.b_score_column] >= 0)
else:
jet_mask = jet_mask & (events.Jet[self.b_score_column] >= 0)

# get flat inputs, evaluated at jet_mask
flavor = flat_np_view(events.Jet.hadronFlavour[jet_mask], axis=1)
abs_eta = flat_np_view(abs(events.Jet.eta[jet_mask]), axis=1)
pt = flat_np_view(events.Jet.pt[jet_mask], axis=1)
b_discr = flat_np_view(events.Jet.btagDeepFlavB[jet_mask], axis=1)
b_discr = flat_np_view(events.Jet[self.b_score_column][jet_mask], axis=1)

# helper to create and store the weight
def add_weight(syst_name, syst_direction, column_name):
Expand Down Expand Up @@ -102,6 +167,11 @@ def add_weight(syst_name, syst_direction, column_name):

# enforce the correct shape and create the product over all jets per event
sf = layout_ak_array(sf_flat_all, events.Jet.pt)

if negative_b_score_log_mode == "remove":
# set the weight to 0 for jets with negative btag score
sf = ak.where(jets_negative_b_score, 0, sf)

weight = ak.prod(sf, axis=1, mask_identity=False)

# save the new column
Expand All @@ -121,6 +191,14 @@ def add_weight(syst_name, syst_direction, column_name):
direction,
f"btag_weight_{name}_{direction}",
)
if syst_name in ["cferr1", "cferr2"]:
# for c flavor uncertainties, multiply the uncertainty with the nominal btag weight
events = set_ak_column(
events,
f"btag_weight_{name}_{direction}",
events.btag_weight * events[f"btag_weight_{name}_{direction}"],
value_type=np.float32,
)
elif self.shift_is_known_jec_source:
# TODO: year dependent jec variations fully covered?
events = add_weight(
Expand All @@ -135,6 +213,40 @@ def add_weight(syst_name, syst_direction, column_name):
return events


def get_b_score_column(btag_config: tuple[str, list, str] | tuple[str, list]) -> str:
"""
Helper function to resolve the btag score column from the btag configuration.
:param btag_config: Entry in auxiliary `config_inst.x.btag_sf`, see example
:py:meth:`~columflow.production.cms.btag.btag_weights`. If tuple has less
than 3 entries, the column name is derived from the name of the correction set.
:returns: Name of column that is required for the calculation of this set of corrections.
"""
corrector_name = btag_config[0]
if len(btag_config) >= 3:
b_score_column = btag_config[2]
else:
# resolve the column name from the corrector name
if "deepjet" in corrector_name.lower():
b_score_column = "btagDeepFlavB"
elif "particlenet" in corrector_name.lower():
b_score_column = "btagPNetB"
else:
raise NotImplementedError(f"Cannot automatically determine btag column for Corrector '{corrector_name}'")
logger.info(f"No btag column specified; defaulting to column '{b_score_column}'")

# warn about potentially wrong column usage
if (
"deepjet" in corrector_name.lower() and "pnet" in b_score_column or
"particlenet" in corrector_name.lower() and "deepflav" in b_score_column
):
logger.warning(
f"Using btag column '{b_score_column}' for BTag Corrector '{corrector_name}' is highly discouraged",
)

return b_score_column


@btag_weights.init
def btag_weights_init(self: Producer) -> None:
# depending on the requested shift_inst, there are three cases to handle:
Expand All @@ -143,6 +255,11 @@ def btag_weights_init(self: Producer) -> None:
# 2. when the nominal shift is requested, the central weight and all variations related to the
# method-intrinsic shifts are produced
# 3. when any other shift is requested, only create the central weight column
btag_config = self.get_btag_config()
self.b_score_column = get_b_score_column(btag_config)

self.uses.add(f"Jet.{self.b_score_column}")

shift_inst = getattr(self, "local_shift_inst", None)
if not shift_inst:
return
Expand All @@ -152,7 +269,7 @@ def btag_weights_init(self: Producer) -> None:
btag_sf_jec_source = "" if self.jec_source == "Total" else self.jec_source
self.shift_is_known_jec_source = (
self.jec_source and
btag_sf_jec_source in self.get_btag_config()[1]
btag_sf_jec_source in btag_config[1]
)

# save names of method-intrinsic uncertainties
Expand Down Expand Up @@ -210,7 +327,3 @@ def btag_weights_setup(
)
corrector_name = self.get_btag_config()[0]
self.btag_sf_corrector = correction_set[corrector_name]

# check versions
if self.btag_sf_corrector.version not in (3,):
raise Exception(f"unsuppprted btag sf corrector version {self.btag_sf_corrector.version}")
Loading

0 comments on commit d60f8ec

Please sign in to comment.