Skip to content

Commit

Permalink
Merge branch 'columnflow:master' into run3_working_branch
Browse files Browse the repository at this point in the history
  • Loading branch information
haddadanas authored Jul 15, 2024
2 parents fc58b94 + 3be6e1b commit 9fec825
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 20 deletions.
11 changes: 5 additions & 6 deletions columnflow/config_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
from collections import OrderedDict

import law
import order
od = order
import order as od

from columnflow.util import maybe_import
from columnflow.types import Callable, Any, Sequence
Expand Down Expand Up @@ -71,7 +70,7 @@ def get_events_from_categories(
return events[mask]


def get_root_processes_from_campaign(campaign: order.config.Campaign) -> order.unique.UniqueObjectIndex:
def get_root_processes_from_campaign(campaign: od.config.Campaign) -> od.unique.UniqueObjectIndex:
"""
Extracts all root process objects from datasets contained in an order *campaign* and returns
them in a unique object index.
Expand Down Expand Up @@ -100,12 +99,12 @@ def get_root_processes_from_campaign(campaign: order.config.Campaign) -> order.u


def get_datasets_from_process(
config: order.config.Config,
process: str | order.process.Process,
config: od.config.Config,
process: str | od.process.Process,
strategy: str = "inclusive",
only_first: bool = True,
check_deep: bool = False,
) -> list[order.dataset.Dataset]:
) -> list[od.dataset.Dataset]:
r"""Given a *process* and the *config* it belongs to, returns a list of order dataset objects that
contain matching processes. This is done by walking through *process* and its child processes
and checking whether they are contained in known datasets. *strategy* controls how possible
Expand Down
4 changes: 2 additions & 2 deletions columnflow/production/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ def normalization_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Arra
)

# read the cross section per process from the lookup table
xs = np.array(self.xs_table[0, process_id].todense())[0]
xs = np.squeeze(np.asarray(self.xs_table[0, process_id].todense()))

# read the sum of event weights per process from the lookup table
sum_weights = np.array(self.sum_weights_table[0, process_id].todense())[0]
sum_weights = np.squeeze(np.asarray(self.sum_weights_table[0, process_id].todense()))

# compute the weight and store it
norm_weight = events.mc_weight * lumi * xs / sum_weights
Expand Down
139 changes: 139 additions & 0 deletions columnflow/selection/cms/jets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# coding: utf-8

"""
Selection modules for jets.
"""

from __future__ import annotations

import law
import math

from columnflow.util import maybe_import, InsertableDict
from columnflow.columnar_util import set_ak_column, optional_column as optional
from columnflow.selection import Selector, SelectionResult, selector

np = maybe_import("numpy")
ak = maybe_import("awkward")

logger = law.logger.get_logger(__name__)


@selector(
uses={
"Jet.{pt,eta,phi,mass,jetId,chEmEF}",
"Muon.{pt,eta,phi,mass,isPFcand}",
optional("Jet.puId"),
},
produces={"Jet.veto_maps"},
get_veto_map_file=(lambda self, external_files: external_files.jet_veto_map),
)
def jet_veto_map(
self: Selector,
events: ak.Array,
**kwargs,
) -> tuple[ak.Array, SelectionResult]:
"""
Selector that applies the Jet Veto Map to the jets and stores the result as a new column ``Jet.veto_maps``.
Additionally, the ``jet_veto_map`` step is added to the SelectionResult that masks events containing
jets from the veto map, which is the recommended way to use the veto map.
For users that only want to remove the jets from the veto map, the ``veto_map_jets`` object
is added to the SelectionResult.
Requires an external file in the config
under ``jet_veto_map``:
.. code-block:: python
cfg.x.external_files = DotDict.wrap({
"jet_veto_map": ("/afs/cern.ch/user/m/mfrahm/public/mirrors/jsonpog-integration-a332cfa/POG/JME/2022_Summer22EE/jetvetomaps.json.gz", "v1"), # noqa
})
*get_veto_map_file* can be adapted in a subclass in case it is stored differently in the external files.
documentation: https://cms-jerc.web.cern.ch/Recommendations/#jet-veto-maps
"""

jet = events.Jet
muon = events.Muon[events.Muon.isPFcand]

jet_mask = (
(jet.pt > 15) &
(jet.jetId >= 2) &
(jet.chEmEF < 0.9) &
(ak.all(events.Jet.metric_table(muon) >= 0.2, axis=2))
)

# apply loose Jet puId in Run 2 to jets with pt below 50 GeV
if self.config_inst.campaign.x.year <= 2018:
jet_pu_mask = (events.Jet.puId >= 4) | (events.Jet.pt > 50)
jet_mask = jet_mask & jet_pu_mask

# for some reason, math.pi is not included in the ranges, so we need to subtract a small number
pi = math.pi - 1e-10

# values outside [-pi, pi] are not included, so we need to wrap the phi values
jet_phi = ak.where(np.abs(events.Jet.phi) > pi, events.Jet.phi - 2 * pi * np.sign(events.Jet.phi), events.Jet.phi)

variable_map = {
"type": "jetvetomap",
"eta": jet.eta,
"phi": jet_phi,
}

inputs = [variable_map[inp.name] for inp in self.veto_map.inputs]

# apply the veto map
veto_map_result = ak.where(
jet_mask,
self.veto_map(*inputs),
-1,
)

# get the Jet veto mask (events containing such a jet should be vetoed)
veto_map_jet_mask = (veto_map_result > 0)

if self.config_inst.campaign.x("postfix", "").lower() == "bpix":
# in postBPix, we need to run the veto map with type=jetvetomap_bpix and subtract this from
# the result of the nominal jet veto map
raise NotImplementedError("Jet Veto Map for 2023 postBPix not implemented yet")

# add the veto map result to the events
events = set_ak_column(events, "Jet.veto_maps", veto_map_result)
results = SelectionResult(
steps={"jet_veto_map": ak.sum(veto_map_jet_mask, axis=1) >= 1},
aux={"veto_map_jet_mask": veto_map_jet_mask},
)

return events, results


@jet_veto_map.requires
def jet_veto_map_requires(self: Selector, reqs: dict) -> None:
if "external_files" in reqs:
return

from columnflow.tasks.external import BundleExternalFiles
reqs["external_files"] = BundleExternalFiles.req(self.task)


@jet_veto_map.setup
def jet_veto_map_setup(
self: Selector,
reqs: dict,
inputs: dict,
reader_targets: InsertableDict,
) -> None:
bundle = reqs["external_files"]

# create the corrector
import correctionlib
correctionlib.highlevel.Correction.__call__ = correctionlib.highlevel.Correction.evaluate
correction_set = correctionlib.CorrectionSet.from_string(
self.get_veto_map_file(bundle.files).load(formatter="gzip").decode("utf-8"),
)
keys = list(correction_set.keys())
if not len(keys) == 1:
raise ValueError(f"Expected exactly one correction in the file, got {len(keys)}")

self.veto_map = correction_set[keys[0]]
2 changes: 1 addition & 1 deletion columnflow/selection/cms/json_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def json_filter(
lookup_result = self.run_ls_lookup[run, ls].todense()

# remove extra dimensions
lookup_result = np.squeeze(np.array(lookup_result))
lookup_result = np.squeeze(np.asarray(lookup_result))

# reject out-ouf-bounds entries
lookup_result = ak.where(out_of_bounds, False, lookup_result)
Expand Down
2 changes: 1 addition & 1 deletion columnflow/tasks/framework/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def my_inference_model(self):

# interpret missing parameters (e.g. NO_STR) as None
# (special case: an empty string is usually an active decision, but counts as missing too)
if law.is_no_param(param) or resolve_default or param == "":
if law.is_no_param(param) or resolve_default or param == "" or param == ():
param = None

# actual resolution
Expand Down
20 changes: 11 additions & 9 deletions columnflow/tasks/framework/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ class CalibratorsMixin(ConfigTask):
calibrators = law.CSVParameter(
default=(RESOLVE_DEFAULT,),
description="comma-separated names of calibrators to be applied; default: value of the "
"'default_calibrator' config in a 1-tuple",
"'default_calibrator' config",
brace_expand=True,
parse_empty=True,
)
Expand Down Expand Up @@ -588,17 +588,17 @@ def find_keep_columns(self: ConfigTask, collection: ColumnCollection) -> set[Rou


class SelectorStepsMixin(SelectorMixin):
"""Mixin to include multiple selector steps into tasks.
"""
Mixin to include multiple selector steps into tasks.
Inheriting from this mixin will allow a task to access selector steps,
which can be a comma-separated list of selector step names and is an input
parameter for this task.
Inheriting from this mixin will allow a task to access selector steps, which can be a
comma-separated list of selector step names and is an input parameter for this task.
"""

selector_steps = law.CSVParameter(
default=(),
default=(RESOLVE_DEFAULT,),
description="a subset of steps of the selector to apply; uses all steps when empty; "
"empty default",
"default: value of the 'default_selector_steps' config",
brace_expand=True,
parse_empty=True,
)
Expand Down Expand Up @@ -881,7 +881,8 @@ class ProducersMixin(ConfigTask):

producers = law.CSVParameter(
default=(RESOLVE_DEFAULT,),
description="comma-separated names of producers to be applied; empty default",
description="comma-separated names of producers to be applied; default: value of the "
"'default_producer' config",
brace_expand=True,
parse_empty=True,
)
Expand Down Expand Up @@ -1591,7 +1592,8 @@ class MLModelsMixin(ConfigTask):

ml_models = law.CSVParameter(
default=(RESOLVE_DEFAULT,),
description="comma-separated names of ML models to be applied; empty default",
description="comma-separated names of ML models to be applied; default: value of the "
"'default_ml_model' config",
brace_expand=True,
parse_empty=True,
)
Expand Down
6 changes: 6 additions & 0 deletions columnflow/tasks/framework/remote_bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ bootstrap_htcondor_standalone() {
local lcg_setup="{{cf_remote_lcg_setup}}"
lcg_setup="${lcg_setup:-/cvmfs/grid.cern.ch/alma9-ui-test/etc/profile.d/setup-alma9-test.sh}"

# temporary fix for missing voms/x509 variables in the lcg setup
export X509_CERT_DIR="/cvmfs/grid.cern.ch/etc/grid-security/certificates"
export X509_VOMS_DIR="/cvmfs/grid.cern.ch/etc/grid-security/vomsdir"
export X509_VOMSES="/cvmfs/grid.cern.ch/etc/grid-security/vomses"
export VOMS_USERCONF="/cvmfs/grid.cern.ch/etc/grid-security/vomses"

# fallback to a default path when the externally given software base is empty or inaccessible
local fetch_software="true"
if [ -z "${CF_SOFTWARE_BASE}" ]; then
Expand Down
7 changes: 7 additions & 0 deletions columnflow/tasks/reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,13 @@ def run(self):
return self._yield_dynamic_deps()


ProvideReducedEventsWrapper = wrapper_factory(
base_cls=AnalysisTask,
require_cls=ProvideReducedEvents,
enable=["configs", "skip_configs", "datasets", "skip_datasets", "shifts", "skip_shifts"],
)


class ReducedEventsUser(
SelectorStepsMixin,
CalibratorsMixin,
Expand Down
2 changes: 1 addition & 1 deletion modules/order

0 comments on commit 9fec825

Please sign in to comment.