Merge branch 'columnflow:master' into run3_working_branch

haddadanas · Jul 15, 2024 · 9fec825 · 9fec825
2 parents fc58b94 + 3be6e1b
commit 9fec825
Show file tree

Hide file tree

Showing 9 changed files with 173 additions and 20 deletions.
diff --git a/columnflow/config_util.py b/columnflow/config_util.py
@@ -23,8 +23,7 @@
 from collections import OrderedDict
 
 import law
-import order
-od = order
+import order as od
 
 from columnflow.util import maybe_import
 from columnflow.types import Callable, Any, Sequence
@@ -71,7 +70,7 @@ def get_events_from_categories(
     return events[mask]
 
 
-def get_root_processes_from_campaign(campaign: order.config.Campaign) -> order.unique.UniqueObjectIndex:
+def get_root_processes_from_campaign(campaign: od.config.Campaign) -> od.unique.UniqueObjectIndex:
     """
     Extracts all root process objects from datasets contained in an order *campaign* and returns
     them in a unique object index.
@@ -100,12 +99,12 @@ def get_root_processes_from_campaign(campaign: order.config.Campaign) -> order.u
 
 
 def get_datasets_from_process(
-    config: order.config.Config,
-    process: str | order.process.Process,
+    config: od.config.Config,
+    process: str | od.process.Process,
     strategy: str = "inclusive",
     only_first: bool = True,
     check_deep: bool = False,
-) -> list[order.dataset.Dataset]:
+) -> list[od.dataset.Dataset]:
     r"""Given a *process* and the *config* it belongs to, returns a list of order dataset objects that
     contain matching processes. This is done by walking through *process* and its child processes
     and checking whether they are contained in known datasets. *strategy* controls how possible

diff --git a/columnflow/production/normalization.py b/columnflow/production/normalization.py
@@ -116,10 +116,10 @@ def normalization_weights(self: Producer, events: ak.Array, **kwargs) -> ak.Arra
         )
 
     # read the cross section per process from the lookup table
-    xs = np.array(self.xs_table[0, process_id].todense())[0]
+    xs = np.squeeze(np.asarray(self.xs_table[0, process_id].todense()))
 
     # read the sum of event weights per process from the lookup table
-    sum_weights = np.array(self.sum_weights_table[0, process_id].todense())[0]
+    sum_weights = np.squeeze(np.asarray(self.sum_weights_table[0, process_id].todense()))
 
     # compute the weight and store it
     norm_weight = events.mc_weight * lumi * xs / sum_weights

diff --git a/columnflow/selection/cms/jets.py b/columnflow/selection/cms/jets.py
@@ -0,0 +1,139 @@
+# coding: utf-8
+
+"""
+Selection modules for jets.
+"""
+
+from __future__ import annotations
+
+import law
+import math
+
+from columnflow.util import maybe_import, InsertableDict
+from columnflow.columnar_util import set_ak_column, optional_column as optional
+from columnflow.selection import Selector, SelectionResult, selector
+
+np = maybe_import("numpy")
+ak = maybe_import("awkward")
+
+logger = law.logger.get_logger(__name__)
+
+
+@selector(
+    uses={
+        "Jet.{pt,eta,phi,mass,jetId,chEmEF}",
+        "Muon.{pt,eta,phi,mass,isPFcand}",
+        optional("Jet.puId"),
+    },
+    produces={"Jet.veto_maps"},
+    get_veto_map_file=(lambda self, external_files: external_files.jet_veto_map),
+)
+def jet_veto_map(
+    self: Selector,
+    events: ak.Array,
+    **kwargs,
+) -> tuple[ak.Array, SelectionResult]:
+    """
+    Selector that applies the Jet Veto Map to the jets and stores the result as a new column ``Jet.veto_maps``.
+    Additionally, the ``jet_veto_map`` step is added to the SelectionResult that masks events containing
+    jets from the veto map, which is the recommended way to use the veto map.
+    For users that only want to remove the jets from the veto map, the ``veto_map_jets`` object
+    is added to the SelectionResult.
+
+    Requires an external file in the config
+    under ``jet_veto_map``:
+
+    .. code-block:: python
+
+        cfg.x.external_files = DotDict.wrap({
+            "jet_veto_map": ("/afs/cern.ch/user/m/mfrahm/public/mirrors/jsonpog-integration-a332cfa/POG/JME/2022_Summer22EE/jetvetomaps.json.gz", "v1"),  # noqa
+        })
+
+    *get_veto_map_file* can be adapted in a subclass in case it is stored differently in the external files.
+
+    documentation: https://cms-jerc.web.cern.ch/Recommendations/#jet-veto-maps
+    """
+
+    jet = events.Jet
+    muon = events.Muon[events.Muon.isPFcand]
+
+    jet_mask = (
+        (jet.pt > 15) &
+        (jet.jetId >= 2) &
+        (jet.chEmEF < 0.9) &
+        (ak.all(events.Jet.metric_table(muon) >= 0.2, axis=2))
+    )
+
+    # apply loose Jet puId in Run 2 to jets with pt below 50 GeV
+    if self.config_inst.campaign.x.year <= 2018:
+        jet_pu_mask = (events.Jet.puId >= 4) | (events.Jet.pt > 50)
+        jet_mask = jet_mask & jet_pu_mask
+
+    # for some reason, math.pi is not included in the ranges, so we need to subtract a small number
+    pi = math.pi - 1e-10
+
+    # values outside [-pi, pi] are not included, so we need to wrap the phi values
+    jet_phi = ak.where(np.abs(events.Jet.phi) > pi, events.Jet.phi - 2 * pi * np.sign(events.Jet.phi), events.Jet.phi)
+
+    variable_map = {
+        "type": "jetvetomap",
+        "eta": jet.eta,
+        "phi": jet_phi,
+    }
+
+    inputs = [variable_map[inp.name] for inp in self.veto_map.inputs]
+
+    # apply the veto map
+    veto_map_result = ak.where(
+        jet_mask,
+        self.veto_map(*inputs),
+        -1,
+    )
+
+    # get the Jet veto mask (events containing such a jet should be vetoed)
+    veto_map_jet_mask = (veto_map_result > 0)
+
+    if self.config_inst.campaign.x("postfix", "").lower() == "bpix":
+        # in postBPix, we need to run the veto map with type=jetvetomap_bpix and subtract this from
+        # the result of the nominal jet veto map
+        raise NotImplementedError("Jet Veto Map for 2023 postBPix not implemented yet")
+
+    # add the veto map result to the events
+    events = set_ak_column(events, "Jet.veto_maps", veto_map_result)
+    results = SelectionResult(
+        steps={"jet_veto_map": ak.sum(veto_map_jet_mask, axis=1) >= 1},
+        aux={"veto_map_jet_mask": veto_map_jet_mask},
+    )
+
+    return events, results
+
+
+@jet_veto_map.requires
+def jet_veto_map_requires(self: Selector, reqs: dict) -> None:
+    if "external_files" in reqs:
+        return
+
+    from columnflow.tasks.external import BundleExternalFiles
+    reqs["external_files"] = BundleExternalFiles.req(self.task)
+
+
+@jet_veto_map.setup
+def jet_veto_map_setup(
+    self: Selector,
+    reqs: dict,
+    inputs: dict,
+    reader_targets: InsertableDict,
+) -> None:
+    bundle = reqs["external_files"]
+
+    # create the corrector
+    import correctionlib
+    correctionlib.highlevel.Correction.__call__ = correctionlib.highlevel.Correction.evaluate
+    correction_set = correctionlib.CorrectionSet.from_string(
+        self.get_veto_map_file(bundle.files).load(formatter="gzip").decode("utf-8"),
+    )
+    keys = list(correction_set.keys())
+    if not len(keys) == 1:
+        raise ValueError(f"Expected exactly one correction in the file, got {len(keys)}")
+
+    self.veto_map = correction_set[keys[0]]
diff --git a/columnflow/selection/cms/json_filter.py b/columnflow/selection/cms/json_filter.py
@@ -82,7 +82,7 @@ def json_filter(
     lookup_result = self.run_ls_lookup[run, ls].todense()
 
     # remove extra dimensions
-    lookup_result = np.squeeze(np.array(lookup_result))
+    lookup_result = np.squeeze(np.asarray(lookup_result))
 
     # reject out-ouf-bounds entries
     lookup_result = ak.where(out_of_bounds, False, lookup_result)

diff --git a/columnflow/tasks/framework/base.py b/columnflow/tasks/framework/base.py
@@ -560,7 +560,7 @@ def my_inference_model(self):
 
         # interpret missing parameters (e.g. NO_STR) as None
         # (special case: an empty string is usually an active decision, but counts as missing too)
-        if law.is_no_param(param) or resolve_default or param == "":
+        if law.is_no_param(param) or resolve_default or param == "" or param == ():
             param = None
 
         # actual resolution

diff --git a/columnflow/tasks/framework/mixins.py b/columnflow/tasks/framework/mixins.py
@@ -230,7 +230,7 @@ class CalibratorsMixin(ConfigTask):
     calibrators = law.CSVParameter(
         default=(RESOLVE_DEFAULT,),
         description="comma-separated names of calibrators to be applied; default: value of the "
-        "'default_calibrator' config in a 1-tuple",
+        "'default_calibrator' config",
         brace_expand=True,
         parse_empty=True,
     )
@@ -588,17 +588,17 @@ def find_keep_columns(self: ConfigTask, collection: ColumnCollection) -> set[Rou
 
 
 class SelectorStepsMixin(SelectorMixin):
-    """Mixin to include multiple selector steps into tasks.
+    """
+    Mixin to include multiple selector steps into tasks.
 
-    Inheriting from this mixin will allow a task to access selector steps,
-    which can be a comma-separated list of selector step names and is an input
-    parameter for this task.
+    Inheriting from this mixin will allow a task to access selector steps, which can be a
+    comma-separated list of selector step names and is an input parameter for this task.
     """
 
     selector_steps = law.CSVParameter(
-        default=(),
+        default=(RESOLVE_DEFAULT,),
         description="a subset of steps of the selector to apply; uses all steps when empty; "
-        "empty default",
+        "default: value of the 'default_selector_steps' config",
         brace_expand=True,
         parse_empty=True,
     )
@@ -881,7 +881,8 @@ class ProducersMixin(ConfigTask):
 
     producers = law.CSVParameter(
         default=(RESOLVE_DEFAULT,),
-        description="comma-separated names of producers to be applied; empty default",
+        description="comma-separated names of producers to be applied; default: value of the "
+        "'default_producer' config",
         brace_expand=True,
         parse_empty=True,
     )
@@ -1591,7 +1592,8 @@ class MLModelsMixin(ConfigTask):
 
     ml_models = law.CSVParameter(
         default=(RESOLVE_DEFAULT,),
-        description="comma-separated names of ML models to be applied; empty default",
+        description="comma-separated names of ML models to be applied; default: value of the "
+        "'default_ml_model' config",
         brace_expand=True,
         parse_empty=True,
     )

diff --git a/columnflow/tasks/framework/remote_bootstrap.sh b/columnflow/tasks/framework/remote_bootstrap.sh
@@ -30,6 +30,12 @@ bootstrap_htcondor_standalone() {
     local lcg_setup="{{cf_remote_lcg_setup}}"
     lcg_setup="${lcg_setup:-/cvmfs/grid.cern.ch/alma9-ui-test/etc/profile.d/setup-alma9-test.sh}"
 
+    # temporary fix for missing voms/x509 variables in the lcg setup
+    export X509_CERT_DIR="/cvmfs/grid.cern.ch/etc/grid-security/certificates"
+    export X509_VOMS_DIR="/cvmfs/grid.cern.ch/etc/grid-security/vomsdir"
+    export X509_VOMSES="/cvmfs/grid.cern.ch/etc/grid-security/vomses"
+    export VOMS_USERCONF="/cvmfs/grid.cern.ch/etc/grid-security/vomses"
+
     # fallback to a default path when the externally given software base is empty or inaccessible
     local fetch_software="true"
     if [ -z "${CF_SOFTWARE_BASE}" ]; then

diff --git a/columnflow/tasks/reduction.py b/columnflow/tasks/reduction.py
@@ -639,6 +639,13 @@ def run(self):
         return self._yield_dynamic_deps()
 
 
+ProvideReducedEventsWrapper = wrapper_factory(
+    base_cls=AnalysisTask,
+    require_cls=ProvideReducedEvents,
+    enable=["configs", "skip_configs", "datasets", "skip_datasets", "shifts", "skip_shifts"],
+)
+
+
 class ReducedEventsUser(
     SelectorStepsMixin,
     CalibratorsMixin,

diff --git a/modules/order b/modules/order