Skip to content

Commit

Permalink
Merge branch 'master' of github.com:haddadanas/columnflow into Docs
Browse files Browse the repository at this point in the history
  • Loading branch information
haddadanas committed Dec 19, 2023
2 parents 9bb6809 + f901c9e commit 2099233
Show file tree
Hide file tree
Showing 20 changed files with 929 additions and 148 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ data
.data
.law
.setups
.mypy_cache
.vscode
1 change: 0 additions & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ build:
pre_create_environment:
- bash setup.sh ""


submodules:
include: all
recursive: true
Expand Down
2 changes: 2 additions & 0 deletions analysis_templates/cms_minimal/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ data
.data
.law
.setups
.mypy_cache
.vscode
3 changes: 1 addition & 2 deletions analysis_templates/cms_minimal/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ setup___cf_short_name_lc__() {
CF_SKIP_SETUP="1" source "${CF_BASE}/setup.sh" "" || return "$?"

# interactive setup
if [ "${CF_REMOTE_JOB}" != "1" ]; then
if [ "${CF_REMOTE_ENV}" != "1" ]; then
cf_setup_interactive_body() {
# pre-export the CF_FLAVOR which will be cms
export CF_FLAVOR="cms"
Expand All @@ -88,7 +88,6 @@ setup___cf_short_name_lc__() {
export CF_CONDA_BASE="${CF_CONDA_BASE:-${CF_SOFTWARE_BASE}/conda}"
export CF_VENV_BASE="${CF_VENV_BASE:-${CF_SOFTWARE_BASE}/venvs}"
export CF_CMSSW_BASE="${CF_CMSSW_BASE:-${CF_SOFTWARE_BASE}/cmssw}"
export CF_CI_JOB="$( [ "${GITHUB_ACTIONS}" = "true" ] && echo 1 || echo 0 )"


#
Expand Down
136 changes: 81 additions & 55 deletions columnflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,31 @@
m = re.match(r"^(\d+)\.(\d+)\.(\d+)(-.+)?$", __version__)
version = tuple(map(int, m.groups()[:3])) + (m.group(4),)

# cf flavor
#: Boolean denoting whether the environment is in a local environment (based on ``CF_LOCAL_ENV``).
env_is_local = law.util.flag_to_bool(os.getenv("CF_LOCAL_ENV", "0"))

#: Boolean denoting whether the environment is in a remote job (based on ``CF_REMOTE_ENV``).
env_is_remote = law.util.flag_to_bool(os.getenv("CF_REMOTE_ENV", "0"))

#: Boolean denoting whether the environment is in a remote job on the WLCG (based on ``CF_ON_GRID``).
env_is_grid = law.util.flag_to_bool(os.getenv("CF_ON_GRID", "0"))

#: Boolean denoting whether the environment is in a remote job on a HTCondor cluster (based on ``CF_ON_HTCONDOR``).
env_is_htcondor = law.util.flag_to_bool(os.getenv("CF_ON_HTCONDOR", "0"))

#: Boolean denoting whether the environment is in a remote job on a Slurm cluster (based on ``CF_ON_SLURM``).
env_is_slurm = law.util.flag_to_bool(os.getenv("CF_ON_SLURM", "0"))

#: Boolean denoting whether the environment is in a CI env (based on ``CF_CI_ENV``).
env_is_ci = law.util.flag_to_bool(os.getenv("CF_CI_ENV", "0"))

#: Boolean denoting whether the environment is in a readthedocs env (based on ``CF_RTD_ENV``, or ``READTHEDOCS``).
env_is_rtd = law.util.flag_to_bool(os.getenv("CF_RTD_ENV" if "CF_RTD" in os.environ else "READTHEDOCS", "0"))

#: Boolean denoting whether the environment is used for development (based on ``CF_DEV``).
env_is_dev = not env_is_remote and law.util.flag_to_bool(os.getenv("CF_DEV", "0"))

#: String refering to the "flavor" of the cf setup.
flavor = os.getenv("CF_FLAVOR")
if isinstance(flavor, str):
flavor = flavor.lower()
Expand All @@ -40,57 +64,59 @@
# some core tasks (BundleCMSSW) need the cms contrib package, to be refactored, see #155
law.contrib.load("cms")

# initialize wlcg file systems once so that their cache cleanup is triggered if configured
if law.config.has_option("outputs", "wlcg_file_systems"):
wlcg_file_systems = [
law.wlcg.WLCGFileSystem(fs.strip())
for fs in law.config.get_expanded("outputs", "wlcg_file_systems", [], split_csv=True)
]

# initialize producers, calibrators, selectors, categorizers, ml models and stat models
from columnflow.util import maybe_import

import columnflow.production # noqa
if law.config.has_option("analysis", "production_modules"):
for m in law.config.get_expanded("analysis", "production_modules", [], split_csv=True):
logger.debug(f"loading production module '{m}'")
maybe_import(m.strip())

import columnflow.calibration # noqa
if law.config.has_option("analysis", "calibration_modules"):
for m in law.config.get_expanded("analysis", "calibration_modules", [], split_csv=True):
logger.debug(f"loading calibration module '{m}'")
maybe_import(m.strip())

import columnflow.selection # noqa
if law.config.has_option("analysis", "selection_modules"):
for m in law.config.get_expanded("analysis", "selection_modules", [], split_csv=True):
logger.debug(f"loading selection module '{m}'")
maybe_import(m.strip())

import columnflow.categorization # noqa
if law.config.has_option("analysis", "categorization_modules"):
for m in law.config.get_expanded("analysis", "categorization_modules", [], split_csv=True):
logger.debug(f"loading categorization module '{m}'")
maybe_import(m.strip())

import columnflow.ml # noqa
if law.config.has_option("analysis", "ml_modules"):
for m in law.config.get_expanded("analysis", "ml_modules", [], split_csv=True):
logger.debug(f"loading ml module '{m}'")
maybe_import(m.strip())

import columnflow.inference # noqa
if law.config.has_option("analysis", "inference_modules"):
for m in law.config.get_expanded("analysis", "inference_modules", [], split_csv=True):
logger.debug(f"loading inference module '{m}'")
maybe_import(m.strip())

# preload all task modules so that task parameters are globally known and accepted
if law.config.has_section("modules"):
for m in law.config.options("modules"):
logger.debug(f"loading task module '{m}'")
maybe_import(m.strip())

# cleanup
del m
# initilize various objects
if not env_is_rtd:
# initialize wlcg file systems once so that their cache cleanup is triggered if configured
if law.config.has_option("outputs", "wlcg_file_systems"):
wlcg_file_systems = [
law.wlcg.WLCGFileSystem(fs.strip())
for fs in law.config.get_expanded("outputs", "wlcg_file_systems", [], split_csv=True)
]

# initialize producers, calibrators, selectors, categorizers, ml models and stat models
from columnflow.util import maybe_import

import columnflow.production # noqa
if law.config.has_option("analysis", "production_modules"):
for m in law.config.get_expanded("analysis", "production_modules", [], split_csv=True):
logger.debug(f"loading production module '{m}'")
maybe_import(m.strip())

import columnflow.calibration # noqa
if law.config.has_option("analysis", "calibration_modules"):
for m in law.config.get_expanded("analysis", "calibration_modules", [], split_csv=True):
logger.debug(f"loading calibration module '{m}'")
maybe_import(m.strip())

import columnflow.selection # noqa
if law.config.has_option("analysis", "selection_modules"):
for m in law.config.get_expanded("analysis", "selection_modules", [], split_csv=True):
logger.debug(f"loading selection module '{m}'")
maybe_import(m.strip())

import columnflow.categorization # noqa
if law.config.has_option("analysis", "categorization_modules"):
for m in law.config.get_expanded("analysis", "categorization_modules", [], split_csv=True):
logger.debug(f"loading categorization module '{m}'")
maybe_import(m.strip())

import columnflow.ml # noqa
if law.config.has_option("analysis", "ml_modules"):
for m in law.config.get_expanded("analysis", "ml_modules", [], split_csv=True):
logger.debug(f"loading ml module '{m}'")
maybe_import(m.strip())

import columnflow.inference # noqa
if law.config.has_option("analysis", "inference_modules"):
for m in law.config.get_expanded("analysis", "inference_modules", [], split_csv=True):
logger.debug(f"loading inference module '{m}'")
maybe_import(m.strip())

# preload all task modules so that task parameters are globally known and accepted
if law.config.has_section("modules"):
for m in law.config.options("modules"):
logger.debug(f"loading task module '{m}'")
maybe_import(m.strip())

# cleanup
del m
42 changes: 42 additions & 0 deletions columnflow/config_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,50 @@
import law
import order as od

from columnflow.util import maybe_import
from columnflow.types import Callable, Any, Sequence

ak = maybe_import("awkward")
np = maybe_import("numpy")


def get_events_from_categories(
events: ak.Array,
categories: Sequence[str | od.Category],
config_inst: od.Config | None = None,
) -> ak.Array:
"""
Helper function that returns all events from an awkward array *events* that are categorized
into one of the leafs of one of the *categories*.
:param events: Awkward array. Requires the 'category_ids' field to be present.
:param categories: Sequence of category instances. Can also be a sequence of strings when passing a
*config_inst*.
:param config_inst: Optional config instance to load category instances.
:raises ValueError: If "category_ids" is not present in the *events* fields.
:return: Awkward array of all events that are categorized into one of the leafs of one of the
*categories*
"""
if "category_ids" not in events.fields:
raise ValueError(
f"{get_events_from_categories.__name__} requires the 'category_ids' field to be present",
)

categories = law.util.make_list(categories)
if config_inst:
# get category insts
categories = [config_inst.get_category(cat) for cat in categories]

leaf_category_insts = set.union(*map(set, (cat.get_leaf_categories() or {cat} for cat in categories)))

# do the "or" of all leaf categories
mask = np.zeros(len(events), dtype=bool)
for cat in leaf_category_insts:
cat_mask = ak.any(events.category_ids == cat.id, axis=1)
mask = cat_mask | mask

return events[mask]


def get_root_processes_from_campaign(campaign: od.Campaign) -> od.UniqueObjectIndex:
"""
Expand Down
Loading

0 comments on commit 2099233

Please sign in to comment.